1/* $NetBSD: union_subr.c,v 1.82 2022/07/18 04:30:30 thorpej Exp $ */ 2 3/* 4 * Copyright (c) 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Jan-Simon Pendry. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95 35 */ 36 37/* 38 * Copyright (c) 1994 Jan-Simon Pendry 39 * 40 * This code is derived from software contributed to Berkeley by 41 * Jan-Simon Pendry. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. All advertising materials mentioning features or use of this software 52 * must display the following acknowledgement: 53 * This product includes software developed by the University of 54 * California, Berkeley and its contributors. 55 * 4. Neither the name of the University nor the names of its contributors 56 * may be used to endorse or promote products derived from this software 57 * without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 * 71 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95 72 */ 73 74#include <sys/cdefs.h> 75__KERNEL_RCSID(0, "$NetBSD: union_subr.c,v 1.82 2022/07/18 04:30:30 thorpej Exp $"); 76 77#include <sys/param.h> 78#include <sys/systm.h> 79#include <sys/proc.h> 80#include <sys/time.h> 81#include <sys/kernel.h> 82#include <sys/vnode.h> 83#include <sys/namei.h> 84#include <sys/malloc.h> 85#include <sys/dirent.h> 86#include <sys/file.h> 87#include <sys/filedesc.h> 88#include <sys/queue.h> 89#include <sys/mount.h> 90#include <sys/stat.h> 91#include <sys/kauth.h> 92 93#include <uvm/uvm_extern.h> 94 95#include <fs/union/union.h> 96#include <miscfs/genfs/genfs.h> 97#include <miscfs/specfs/specdev.h> 98 99static LIST_HEAD(uhashhead, union_node) *uhashtbl; 100static u_long uhash_mask; /* size of hash table - 1 */ 101#define UNION_HASH(u, l) \ 102 ((((u_long) (u) + (u_long) (l)) >> 8) & uhash_mask) 103#define NOHASH ((u_long)-1) 104 105static kmutex_t uhash_lock; 106 107static void union_newupper(struct union_node *, struct vnode *); 108static void union_newlower(struct union_node *, struct vnode *); 109static void union_ref(struct union_node *); 110static void union_rele(struct union_node *); 111static int union_do_lookup(struct vnode *, struct componentname *, kauth_cred_t, const char *); 112int union_vn_close(struct vnode *, int, kauth_cred_t, struct lwp *); 113static void union_dircache_r(struct vnode *, struct vnode ***, int *); 114struct vnode *union_dircache(struct vnode *, struct lwp *); 115 116void 117union_init(void) 118{ 119 120 mutex_init(&uhash_lock, MUTEX_DEFAULT, IPL_NONE); 121 uhashtbl = hashinit(desiredvnodes, HASH_LIST, true, &uhash_mask); 122} 123 124void 125union_reinit(void) 126{ 127 struct union_node *un; 128 struct uhashhead *oldhash, *hash; 129 u_long oldmask, mask, val; 130 int i; 131 132 hash = hashinit(desiredvnodes, HASH_LIST, true, &mask); 133 mutex_enter(&uhash_lock); 134 oldhash = uhashtbl; 135 oldmask = uhash_mask; 136 uhashtbl = hash; 137 uhash_mask = mask; 138 for (i = 0; i <= oldmask; i++) { 139 while ((un = LIST_FIRST(&oldhash[i])) != NULL) { 140 LIST_REMOVE(un, un_cache); 141 val = UNION_HASH(un->un_uppervp, un->un_lowervp); 142 LIST_INSERT_HEAD(&hash[val], un, un_cache); 143 } 144 } 145 mutex_exit(&uhash_lock); 146 hashdone(oldhash, HASH_LIST, oldmask); 147} 148 149/* 150 * Free global unionfs resources. 151 */ 152void 153union_done(void) 154{ 155 156 hashdone(uhashtbl, HASH_LIST, uhash_mask); 157 mutex_destroy(&uhash_lock); 158 159 /* Make sure to unset the readdir hook. */ 160 vn_union_readdir_hook = NULL; 161} 162 163void 164union_newlower(struct union_node *un, struct vnode *lowervp) 165{ 166 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); 167 int nhash = UNION_HASH(un->un_uppervp, lowervp); 168 169 if (un->un_lowervp == lowervp) 170 return; 171 172 KASSERT(VOP_ISLOCKED(UNIONTOV(un)) == LK_EXCLUSIVE); 173 KASSERT(un->un_lowervp == NULL); 174 175 mutex_enter(&uhash_lock); 176 177 if (ohash != nhash && (un->un_cflags & UN_CACHED)) { 178 un->un_cflags &= ~UN_CACHED; 179 LIST_REMOVE(un, un_cache); 180 } 181 mutex_enter(&un->un_lock); 182 un->un_lowervp = lowervp; 183 un->un_lowersz = VNOVAL; 184 mutex_exit(&un->un_lock); 185 if (ohash != nhash) { 186 LIST_INSERT_HEAD(&uhashtbl[nhash], un, un_cache); 187 un->un_cflags |= UN_CACHED; 188 } 189 190 mutex_exit(&uhash_lock); 191} 192 193void 194union_newupper(struct union_node *un, struct vnode *uppervp) 195{ 196 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); 197 int nhash = UNION_HASH(uppervp, un->un_lowervp); 198 struct vop_lock_args lock_ap; 199 struct vop_unlock_args unlock_ap; 200 int error __diagused; 201 202 if (un->un_uppervp == uppervp) 203 return; 204 205 KASSERT(VOP_ISLOCKED(UNIONTOV(un)) == LK_EXCLUSIVE); 206 KASSERT(un->un_uppervp == NULL); 207 208 /* 209 * We have to transfer the vnode lock from the union vnode to 210 * the upper vnode. Lock the upper vnode first. We cannot use 211 * VOP_LOCK() here as it would break the fstrans state. 212 */ 213 lock_ap.a_desc = VDESC(vop_lock); 214 lock_ap.a_vp = uppervp; 215 lock_ap.a_flags = LK_EXCLUSIVE; 216 error = VCALL(lock_ap.a_vp, VOFFSET(vop_lock), &lock_ap); 217 KASSERT(error == 0); 218 219 mutex_enter(&uhash_lock); 220 221 if (ohash != nhash && (un->un_cflags & UN_CACHED)) { 222 un->un_cflags &= ~UN_CACHED; 223 LIST_REMOVE(un, un_cache); 224 } 225 mutex_enter(&un->un_lock); 226 un->un_uppervp = uppervp; 227 un->un_uppersz = VNOVAL; 228 /* 229 * With the upper vnode in place unlock the union vnode to 230 * finalize the lock transfer. 231 */ 232 unlock_ap.a_desc = VDESC(vop_unlock); 233 unlock_ap.a_vp = UNIONTOV(un); 234 genfs_unlock(&unlock_ap); 235 /* Update union vnode interlock, vmobjlock, & klist. */ 236 vshareilock(UNIONTOV(un), uppervp); 237 rw_obj_hold(uppervp->v_uobj.vmobjlock); 238 uvm_obj_setlock(&UNIONTOV(un)->v_uobj, uppervp->v_uobj.vmobjlock); 239 vshareklist(UNIONTOV(un), uppervp); 240 mutex_exit(&un->un_lock); 241 if (ohash != nhash) { 242 LIST_INSERT_HEAD(&uhashtbl[nhash], un, un_cache); 243 un->un_cflags |= UN_CACHED; 244 } 245 246 mutex_exit(&uhash_lock); 247} 248 249/* 250 * Keep track of size changes in the underlying vnodes. 251 * If the size changes, then callback to the vm layer 252 * giving priority to the upper layer size. 253 * 254 * Mutex un_lock hold on entry and released on return. 255 */ 256void 257union_newsize(struct vnode *vp, off_t uppersz, off_t lowersz) 258{ 259 struct union_node *un = VTOUNION(vp); 260 off_t sz; 261 262 KASSERT(mutex_owned(&un->un_lock)); 263 /* only interested in regular files */ 264 if (vp->v_type != VREG) { 265 mutex_exit(&un->un_lock); 266 uvm_vnp_setsize(vp, 0); 267 return; 268 } 269 270 sz = VNOVAL; 271 272 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) { 273 un->un_uppersz = uppersz; 274 if (sz == VNOVAL) 275 sz = un->un_uppersz; 276 } 277 278 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) { 279 un->un_lowersz = lowersz; 280 if (sz == VNOVAL) 281 sz = un->un_lowersz; 282 } 283 mutex_exit(&un->un_lock); 284 285 if (sz != VNOVAL) { 286#ifdef UNION_DIAGNOSTIC 287 printf("union: %s size now %qd\n", 288 uppersz != VNOVAL ? "upper" : "lower", sz); 289#endif 290 uvm_vnp_setsize(vp, sz); 291 } 292} 293 294static void 295union_ref(struct union_node *un) 296{ 297 298 KASSERT(mutex_owned(&uhash_lock)); 299 un->un_refs++; 300} 301 302static void 303union_rele(struct union_node *un) 304{ 305 306 mutex_enter(&uhash_lock); 307 un->un_refs--; 308 if (un->un_refs > 0) { 309 mutex_exit(&uhash_lock); 310 return; 311 } 312 if (un->un_cflags & UN_CACHED) { 313 un->un_cflags &= ~UN_CACHED; 314 LIST_REMOVE(un, un_cache); 315 } 316 mutex_exit(&uhash_lock); 317 318 if (un->un_pvp != NULLVP) 319 vrele(un->un_pvp); 320 if (un->un_uppervp != NULLVP) 321 vrele(un->un_uppervp); 322 if (un->un_lowervp != NULLVP) 323 vrele(un->un_lowervp); 324 if (un->un_dirvp != NULLVP) 325 vrele(un->un_dirvp); 326 if (un->un_path) 327 free(un->un_path, M_TEMP); 328 mutex_destroy(&un->un_lock); 329 330 free(un, M_TEMP); 331} 332 333/* 334 * allocate a union_node/vnode pair. the vnode is 335 * referenced and unlocked. the new vnode is returned 336 * via (vpp). (mp) is the mountpoint of the union filesystem, 337 * (dvp) is the parent directory where the upper layer object 338 * should exist (but doesn't) and (cnp) is the componentname 339 * information which is partially copied to allow the upper 340 * layer object to be created at a later time. (uppervp) 341 * and (lowervp) reference the upper and lower layer objects 342 * being mapped. either, but not both, can be nil. 343 * both, if supplied, are unlocked. 344 * the reference is either maintained in the new union_node 345 * object which is allocated, or they are vrele'd. 346 * 347 * all union_nodes are maintained on a hash 348 * list. new nodes are only allocated when they cannot 349 * be found on this list. entries on the list are 350 * removed when the vfs reclaim entry is called. 351 * 352 * the vnode gets attached or referenced with vcache_get(). 353 */ 354int 355union_allocvp( 356 struct vnode **vpp, 357 struct mount *mp, 358 struct vnode *undvp, /* parent union vnode */ 359 struct vnode *dvp, /* may be null */ 360 struct componentname *cnp, /* may be null */ 361 struct vnode *uppervp, /* may be null */ 362 struct vnode *lowervp, /* may be null */ 363 int docache) 364{ 365 int error; 366 struct union_node *un = NULL, *un1; 367 struct vnode *vp, *xlowervp = NULLVP; 368 u_long hash[3]; 369 int try; 370 bool is_dotdot; 371 372 is_dotdot = (dvp != NULL && cnp != NULL && (cnp->cn_flags & ISDOTDOT)); 373 374 if (uppervp == NULLVP && lowervp == NULLVP) 375 panic("union: unidentifiable allocation"); 376 377 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { 378 xlowervp = lowervp; 379 lowervp = NULLVP; 380 } 381 382 /* 383 * If both uppervp and lowervp are not NULL we have to 384 * search union nodes with one vnode as NULL too. 385 */ 386 hash[0] = UNION_HASH(uppervp, lowervp); 387 if (uppervp == NULL || lowervp == NULL) { 388 hash[1] = hash[2] = NOHASH; 389 } else { 390 hash[1] = UNION_HASH(uppervp, NULLVP); 391 hash[2] = UNION_HASH(NULLVP, lowervp); 392 } 393 394 if (!docache) { 395 un = NULL; 396 goto found; 397 } 398 399loop: 400 mutex_enter(&uhash_lock); 401 402 for (try = 0; try < 3; try++) { 403 if (hash[try] == NOHASH) 404 continue; 405 LIST_FOREACH(un, &uhashtbl[hash[try]], un_cache) { 406 if ((un->un_lowervp && un->un_lowervp != lowervp) || 407 (un->un_uppervp && un->un_uppervp != uppervp) || 408 un->un_mount != mp) 409 continue; 410 411 union_ref(un); 412 mutex_exit(&uhash_lock); 413 error = vcache_get(mp, &un, sizeof(un), &vp); 414 KASSERT(error != 0 || UNIONTOV(un) == vp); 415 union_rele(un); 416 if (error == ENOENT) 417 goto loop; 418 else if (error) 419 goto out; 420 goto found; 421 } 422 } 423 424 mutex_exit(&uhash_lock); 425 426found: 427 if (un) { 428 if (uppervp != dvp) { 429 if (is_dotdot) 430 VOP_UNLOCK(dvp); 431 vn_lock(UNIONTOV(un), LK_EXCLUSIVE | LK_RETRY); 432 if (is_dotdot) 433 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 434 } 435 /* 436 * Save information about the upper layer. 437 */ 438 if (uppervp != un->un_uppervp) { 439 union_newupper(un, uppervp); 440 } else if (uppervp) { 441 vrele(uppervp); 442 } 443 444 /* 445 * Save information about the lower layer. 446 * This needs to keep track of pathname 447 * and directory information which union_vn_create 448 * might need. 449 */ 450 if (lowervp != un->un_lowervp) { 451 union_newlower(un, lowervp); 452 if (cnp && (lowervp != NULLVP)) { 453 un->un_path = malloc(cnp->cn_namelen+1, 454 M_TEMP, M_WAITOK); 455 memcpy(un->un_path, cnp->cn_nameptr, 456 cnp->cn_namelen); 457 un->un_path[cnp->cn_namelen] = '\0'; 458 vref(dvp); 459 un->un_dirvp = dvp; 460 } 461 } else if (lowervp) { 462 vrele(lowervp); 463 } 464 *vpp = UNIONTOV(un); 465 if (uppervp != dvp) 466 VOP_UNLOCK(*vpp); 467 error = 0; 468 goto out; 469 } 470 471 un = malloc(sizeof(struct union_node), M_TEMP, M_WAITOK); 472 mutex_init(&un->un_lock, MUTEX_DEFAULT, IPL_NONE); 473 un->un_refs = 1; 474 un->un_mount = mp; 475 un->un_vnode = NULL; 476 un->un_uppervp = uppervp; 477 un->un_lowervp = lowervp; 478 un->un_pvp = undvp; 479 if (undvp != NULLVP) 480 vref(undvp); 481 un->un_dircache = 0; 482 un->un_openl = 0; 483 un->un_cflags = 0; 484 un->un_hooknode = false; 485 486 un->un_uppersz = VNOVAL; 487 un->un_lowersz = VNOVAL; 488 489 if (dvp && cnp && (lowervp != NULLVP)) { 490 un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); 491 memcpy(un->un_path, cnp->cn_nameptr, cnp->cn_namelen); 492 un->un_path[cnp->cn_namelen] = '\0'; 493 vref(dvp); 494 un->un_dirvp = dvp; 495 } else { 496 un->un_path = 0; 497 un->un_dirvp = 0; 498 } 499 500 if (docache) { 501 mutex_enter(&uhash_lock); 502 LIST_FOREACH(un1, &uhashtbl[hash[0]], un_cache) { 503 if (un1->un_lowervp == lowervp && 504 un1->un_uppervp == uppervp && 505 un1->un_mount == mp) { 506 /* 507 * Another thread beat us, push back freshly 508 * allocated node and retry. 509 */ 510 mutex_exit(&uhash_lock); 511 union_rele(un); 512 goto loop; 513 } 514 } 515 LIST_INSERT_HEAD(&uhashtbl[hash[0]], un, un_cache); 516 un->un_cflags |= UN_CACHED; 517 mutex_exit(&uhash_lock); 518 } 519 520 error = vcache_get(mp, &un, sizeof(un), vpp); 521 KASSERT(error != 0 || UNIONTOV(un) == *vpp); 522 union_rele(un); 523 if (error == ENOENT) 524 goto loop; 525 526out: 527 if (xlowervp) 528 vrele(xlowervp); 529 530 return error; 531} 532 533int 534union_freevp(struct vnode *vp) 535{ 536 struct union_node *un = VTOUNION(vp); 537 538 /* Detach vnode from union node. */ 539 un->un_vnode = NULL; 540 un->un_uppersz = VNOVAL; 541 un->un_lowersz = VNOVAL; 542 543 /* Detach union node from vnode. */ 544 mutex_enter(vp->v_interlock); 545 vp->v_data = NULL; 546 mutex_exit(vp->v_interlock); 547 548 union_rele(un); 549 550 return 0; 551} 552 553int 554union_loadvnode(struct mount *mp, struct vnode *vp, 555 const void *key, size_t key_len, const void **new_key) 556{ 557 struct vattr va; 558 struct vnode *svp; 559 struct union_node *un; 560 struct union_mount *um; 561 voff_t uppersz, lowersz; 562 563 KASSERT(key_len == sizeof(un)); 564 memcpy(&un, key, key_len); 565 566 um = MOUNTTOUNIONMOUNT(mp); 567 svp = (un->un_uppervp != NULLVP) ? un->un_uppervp : un->un_lowervp; 568 569 vp->v_tag = VT_UNION; 570 vp->v_op = union_vnodeop_p; 571 vp->v_data = un; 572 un->un_vnode = vp; 573 574 vp->v_type = svp->v_type; 575 if (svp->v_type == VCHR || svp->v_type == VBLK) 576 spec_node_init(vp, svp->v_rdev); 577 578 vshareilock(vp, svp); 579 rw_obj_hold(svp->v_uobj.vmobjlock); 580 uvm_obj_setlock(&vp->v_uobj, svp->v_uobj.vmobjlock); 581 vshareklist(vp, svp); 582 583 /* detect the root vnode (and aliases) */ 584 if ((un->un_uppervp == um->um_uppervp) && 585 ((un->un_lowervp == NULLVP) || un->un_lowervp == um->um_lowervp)) { 586 if (un->un_lowervp == NULLVP) { 587 un->un_lowervp = um->um_lowervp; 588 if (un->un_lowervp != NULLVP) 589 vref(un->un_lowervp); 590 } 591 vp->v_vflag |= VV_ROOT; 592 } 593 594 uppersz = lowersz = VNOVAL; 595 if (un->un_uppervp != NULLVP) { 596 if (vn_lock(un->un_uppervp, LK_SHARED) == 0) { 597 if (VOP_GETATTR(un->un_uppervp, &va, FSCRED) == 0) 598 uppersz = va.va_size; 599 VOP_UNLOCK(un->un_uppervp); 600 } 601 } 602 if (un->un_lowervp != NULLVP) { 603 if (vn_lock(un->un_lowervp, LK_SHARED) == 0) { 604 if (VOP_GETATTR(un->un_lowervp, &va, FSCRED) == 0) 605 lowersz = va.va_size; 606 VOP_UNLOCK(un->un_lowervp); 607 } 608 } 609 610 mutex_enter(&un->un_lock); 611 union_newsize(vp, uppersz, lowersz); 612 613 mutex_enter(&uhash_lock); 614 union_ref(un); 615 mutex_exit(&uhash_lock); 616 617 *new_key = &vp->v_data; 618 619 return 0; 620} 621 622/* 623 * copyfile. copy the vnode (fvp) to the vnode (tvp) 624 * using a sequence of reads and writes. both (fvp) 625 * and (tvp) are locked on entry and exit. 626 */ 627int 628union_copyfile(struct vnode *fvp, struct vnode *tvp, kauth_cred_t cred, 629 struct lwp *l) 630{ 631 char *tbuf; 632 struct uio uio; 633 struct iovec iov; 634 int error = 0; 635 636 /* 637 * strategy: 638 * allocate a buffer of size MAXBSIZE. 639 * loop doing reads and writes, keeping track 640 * of the current uio offset. 641 * give up at the first sign of trouble. 642 */ 643 644 uio.uio_offset = 0; 645 UIO_SETUP_SYSSPACE(&uio); 646 647 tbuf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); 648 649 /* ugly loop follows... */ 650 do { 651 off_t offset = uio.uio_offset; 652 653 uio.uio_iov = &iov; 654 uio.uio_iovcnt = 1; 655 iov.iov_base = tbuf; 656 iov.iov_len = MAXBSIZE; 657 uio.uio_resid = iov.iov_len; 658 uio.uio_rw = UIO_READ; 659 error = VOP_READ(fvp, &uio, 0, cred); 660 661 if (error == 0) { 662 uio.uio_iov = &iov; 663 uio.uio_iovcnt = 1; 664 iov.iov_base = tbuf; 665 iov.iov_len = MAXBSIZE - uio.uio_resid; 666 uio.uio_offset = offset; 667 uio.uio_rw = UIO_WRITE; 668 uio.uio_resid = iov.iov_len; 669 670 if (uio.uio_resid == 0) 671 break; 672 673 do { 674 error = VOP_WRITE(tvp, &uio, 0, cred); 675 } while ((uio.uio_resid > 0) && (error == 0)); 676 } 677 678 } while (error == 0); 679 680 free(tbuf, M_TEMP); 681 return (error); 682} 683 684/* 685 * (un) is assumed to be locked on entry and remains 686 * locked on exit. 687 */ 688int 689union_copyup(struct union_node *un, int docopy, kauth_cred_t cred, 690 struct lwp *l) 691{ 692 int error; 693 struct vnode *lvp, *uvp; 694 struct vattr lvattr, uvattr; 695 696 error = union_vn_create(&uvp, un, l); 697 if (error) 698 return (error); 699 700 union_newupper(un, uvp); 701 702 lvp = un->un_lowervp; 703 704 if (docopy) { 705 /* 706 * XX - should not ignore errors 707 * from VOP_CLOSE 708 */ 709 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY); 710 711 error = VOP_GETATTR(lvp, &lvattr, cred); 712 if (error == 0) 713 error = VOP_OPEN(lvp, FREAD, cred); 714 if (error == 0) { 715 error = union_copyfile(lvp, uvp, cred, l); 716 (void) VOP_CLOSE(lvp, FREAD, cred); 717 } 718 if (error == 0) { 719 /* Copy permissions up too */ 720 vattr_null(&uvattr); 721 uvattr.va_mode = lvattr.va_mode; 722 uvattr.va_flags = lvattr.va_flags; 723 error = VOP_SETATTR(uvp, &uvattr, cred); 724 } 725 VOP_UNLOCK(lvp); 726#ifdef UNION_DIAGNOSTIC 727 if (error == 0) 728 uprintf("union: copied up %s\n", un->un_path); 729#endif 730 731 } 732 union_vn_close(uvp, FWRITE, cred, l); 733 734 /* 735 * Subsequent IOs will go to the top layer, so 736 * call close on the lower vnode and open on the 737 * upper vnode to ensure that the filesystem keeps 738 * its references counts right. This doesn't do 739 * the right thing with (cred) and (FREAD) though. 740 * Ignoring error returns is not right, either. 741 */ 742 if (error == 0) { 743 int i; 744 745 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY); 746 for (i = 0; i < un->un_openl; i++) { 747 (void) VOP_CLOSE(lvp, FREAD, cred); 748 (void) VOP_OPEN(uvp, FREAD, cred); 749 } 750 un->un_openl = 0; 751 VOP_UNLOCK(lvp); 752 } 753 754 return (error); 755 756} 757 758/* 759 * Prepare the creation of a new node in the upper layer. 760 * 761 * (dvp) is the directory in which to create the new node. 762 * it is locked on entry and exit. 763 * (cnp) is the componentname to be created. 764 * (cred, path, hash) are credentials, path and its hash to fill (cnp). 765 */ 766static int 767union_do_lookup(struct vnode *dvp, struct componentname *cnp, kauth_cred_t cred, 768 const char *path) 769{ 770 int error; 771 struct vnode *vp; 772 773 cnp->cn_nameiop = CREATE; 774 cnp->cn_flags = LOCKPARENT | ISLASTCN; 775 cnp->cn_cred = cred; 776 cnp->cn_nameptr = path; 777 cnp->cn_namelen = strlen(path); 778 779 error = VOP_LOOKUP(dvp, &vp, cnp); 780 781 if (error == 0) { 782 KASSERT(vp != NULL); 783 VOP_ABORTOP(dvp, cnp); 784 vrele(vp); 785 error = EEXIST; 786 } else if (error == EJUSTRETURN) { 787 error = 0; 788 } 789 790 return error; 791} 792 793/* 794 * Create a shadow directory in the upper layer. 795 * The new vnode is returned locked. 796 * 797 * (um) points to the union mount structure for access to the 798 * the mounting process's credentials. 799 * (dvp) is the directory in which to create the shadow directory. 800 * it is unlocked on entry and exit. 801 * (cnp) is the componentname to be created. 802 * (vpp) is the returned newly created shadow directory, which 803 * is returned locked. 804 * 805 * N.B. We still attempt to create shadow directories even if the union 806 * is mounted read-only, which is a little nonintuitive. 807 */ 808int 809union_mkshadow(struct union_mount *um, struct vnode *dvp, 810 struct componentname *cnp, struct vnode **vpp) 811{ 812 int error; 813 struct vattr va; 814 struct componentname cn; 815 char *pnbuf; 816 817 if (cnp->cn_namelen + 1 > MAXPATHLEN) 818 return ENAMETOOLONG; 819 pnbuf = PNBUF_GET(); 820 memcpy(pnbuf, cnp->cn_nameptr, cnp->cn_namelen); 821 pnbuf[cnp->cn_namelen] = '\0'; 822 823 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 824 825 error = union_do_lookup(dvp, &cn, 826 (um->um_op == UNMNT_ABOVE ? cnp->cn_cred : um->um_cred), pnbuf); 827 if (error) { 828 VOP_UNLOCK(dvp); 829 PNBUF_PUT(pnbuf); 830 return error; 831 } 832 833 /* 834 * policy: when creating the shadow directory in the 835 * upper layer, create it owned by the user who did 836 * the mount, group from parent directory, and mode 837 * 777 modified by umask (ie mostly identical to the 838 * mkdir syscall). (jsp, kb) 839 */ 840 841 vattr_null(&va); 842 va.va_type = VDIR; 843 va.va_mode = um->um_cmode; 844 845 KASSERT(*vpp == NULL); 846 error = VOP_MKDIR(dvp, vpp, &cn, &va); 847 VOP_UNLOCK(dvp); 848 PNBUF_PUT(pnbuf); 849 return error; 850} 851 852/* 853 * Create a whiteout entry in the upper layer. 854 * 855 * (um) points to the union mount structure for access to the 856 * the mounting process's credentials. 857 * (dvp) is the directory in which to create the whiteout. 858 * it is locked on entry and exit. 859 * (cnp) is the componentname to be created. 860 * (un) holds the path and its hash to be created. 861 */ 862int 863union_mkwhiteout(struct union_mount *um, struct vnode *dvp, 864 struct componentname *cnp, struct union_node *un) 865{ 866 int error; 867 struct componentname cn; 868 869 error = union_do_lookup(dvp, &cn, 870 (um->um_op == UNMNT_ABOVE ? cnp->cn_cred : um->um_cred), 871 un->un_path); 872 if (error) 873 return error; 874 875 error = VOP_WHITEOUT(dvp, &cn, CREATE); 876 return error; 877} 878 879/* 880 * union_vn_create: creates and opens a new shadow file 881 * on the upper union layer. this function is similar 882 * in spirit to calling vn_open but it avoids calling namei(). 883 * the problem with calling namei is that a) it locks too many 884 * things, and b) it doesn't start at the "right" directory, 885 * whereas union_do_lookup is told where to start. 886 */ 887int 888union_vn_create(struct vnode **vpp, struct union_node *un, struct lwp *l) 889{ 890 struct vnode *vp; 891 kauth_cred_t cred = l->l_cred; 892 struct vattr vat; 893 struct vattr *vap = &vat; 894 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); 895 int error; 896 int cmode = UN_FILEMODE & ~l->l_proc->p_cwdi->cwdi_cmask; 897 struct componentname cn; 898 899 *vpp = NULLVP; 900 901 vn_lock(un->un_dirvp, LK_EXCLUSIVE | LK_RETRY); 902 903 error = union_do_lookup(un->un_dirvp, &cn, l->l_cred, 904 un->un_path); 905 if (error) { 906 VOP_UNLOCK(un->un_dirvp); 907 return error; 908 } 909 910 /* 911 * Good - there was no race to create the file 912 * so go ahead and create it. The permissions 913 * on the file will be 0666 modified by the 914 * current user's umask. Access to the file, while 915 * it is unioned, will require access to the top *and* 916 * bottom files. Access when not unioned will simply 917 * require access to the top-level file. 918 * TODO: confirm choice of access permissions. 919 */ 920 vattr_null(vap); 921 vap->va_type = VREG; 922 vap->va_mode = cmode; 923 vp = NULL; 924 error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap); 925 if (error) { 926 VOP_UNLOCK(un->un_dirvp); 927 return error; 928 } 929 930 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 931 VOP_UNLOCK(un->un_dirvp); 932 error = VOP_OPEN(vp, fmode, cred); 933 if (error) { 934 vput(vp); 935 return error; 936 } 937 938 vp->v_writecount++; 939 VOP_UNLOCK(vp); 940 *vpp = vp; 941 return 0; 942} 943 944int 945union_vn_close(struct vnode *vp, int fmode, kauth_cred_t cred, struct lwp *l) 946{ 947 948 if (fmode & FWRITE) 949 --vp->v_writecount; 950 return (VOP_CLOSE(vp, fmode, cred)); 951} 952 953void 954union_removed_upper(struct union_node *un) 955{ 956 struct vnode *vp = UNIONTOV(un); 957 958 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 959#if 1 960 /* 961 * We do not set the uppervp to NULLVP here, because lowervp 962 * may also be NULLVP, so this routine would end up creating 963 * a bogus union node with no upper or lower VP (that causes 964 * pain in many places that assume at least one VP exists). 965 * Since we've removed this node from the cache hash chains, 966 * it won't be found again. When all current holders 967 * release it, union_inactive() will vgone() it. 968 */ 969 union_diruncache(un); 970#else 971 union_newupper(un, NULLVP); 972#endif 973 974 VOP_UNLOCK(vp); 975 976 mutex_enter(&uhash_lock); 977 if (un->un_cflags & UN_CACHED) { 978 un->un_cflags &= ~UN_CACHED; 979 LIST_REMOVE(un, un_cache); 980 } 981 mutex_exit(&uhash_lock); 982} 983 984#if 0 985struct vnode * 986union_lowervp(struct vnode *vp) 987{ 988 struct union_node *un = VTOUNION(vp); 989 990 if ((un->un_lowervp != NULLVP) && 991 (vp->v_type == un->un_lowervp->v_type)) { 992 if (vget(un->un_lowervp, 0, true /* wait */) == 0) 993 return (un->un_lowervp); 994 } 995 996 return (NULLVP); 997} 998#endif 999 1000/* 1001 * determine whether a whiteout is needed 1002 * during a remove/rmdir operation. 1003 */ 1004int 1005union_dowhiteout(struct union_node *un, kauth_cred_t cred) 1006{ 1007 struct vattr va; 1008 1009 if (un->un_lowervp != NULLVP) 1010 return (1); 1011 1012 if (VOP_GETATTR(un->un_uppervp, &va, cred) == 0 && 1013 (va.va_flags & OPAQUE)) 1014 return (1); 1015 1016 return (0); 1017} 1018 1019static void 1020union_dircache_r(struct vnode *vp, struct vnode ***vppp, int *cntp) 1021{ 1022 struct union_node *un; 1023 1024 if (vp->v_op != union_vnodeop_p) { 1025 if (vppp) { 1026 vref(vp); 1027 *(*vppp)++ = vp; 1028 if (--(*cntp) == 0) 1029 panic("union: dircache table too small"); 1030 } else { 1031 (*cntp)++; 1032 } 1033 1034 return; 1035 } 1036 1037 un = VTOUNION(vp); 1038 if (un->un_uppervp != NULLVP) 1039 union_dircache_r(un->un_uppervp, vppp, cntp); 1040 if (un->un_lowervp != NULLVP) 1041 union_dircache_r(un->un_lowervp, vppp, cntp); 1042} 1043 1044struct vnode * 1045union_dircache(struct vnode *vp, struct lwp *l) 1046{ 1047 int cnt; 1048 struct vnode *nvp = NULLVP; 1049 struct vnode **vpp; 1050 struct vnode **dircache; 1051 int error; 1052 1053 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1054 dircache = VTOUNION(vp)->un_dircache; 1055 1056 nvp = NULLVP; 1057 1058 if (dircache == 0) { 1059 cnt = 0; 1060 union_dircache_r(vp, 0, &cnt); 1061 cnt++; 1062 dircache = (struct vnode **) 1063 malloc(cnt * sizeof(struct vnode *), 1064 M_TEMP, M_WAITOK); 1065 vpp = dircache; 1066 union_dircache_r(vp, &vpp, &cnt); 1067 VTOUNION(vp)->un_dircache = dircache; 1068 *vpp = NULLVP; 1069 vpp = dircache + 1; 1070 } else { 1071 vpp = dircache; 1072 do { 1073 if (*vpp++ == VTOUNION(vp)->un_lowervp) 1074 break; 1075 } while (*vpp != NULLVP); 1076 } 1077 1078 if (*vpp == NULLVP) 1079 goto out; 1080 1081 vref(*vpp); 1082 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, 1083 NULLVP, *vpp, 0); 1084 if (!error) { 1085 vn_lock(nvp, LK_EXCLUSIVE | LK_RETRY); 1086 VTOUNION(vp)->un_dircache = 0; 1087 VTOUNION(nvp)->un_hooknode = true; 1088 VTOUNION(nvp)->un_dircache = dircache; 1089 } 1090 1091out: 1092 VOP_UNLOCK(vp); 1093 return (nvp); 1094} 1095 1096void 1097union_diruncache(struct union_node *un) 1098{ 1099 struct vnode **vpp; 1100 1101 KASSERT(VOP_ISLOCKED(UNIONTOV(un)) == LK_EXCLUSIVE); 1102 if (un->un_dircache != 0) { 1103 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) 1104 vrele(*vpp); 1105 free(un->un_dircache, M_TEMP); 1106 un->un_dircache = 0; 1107 } 1108} 1109 1110/* 1111 * Check whether node can rmdir (check empty). 1112 */ 1113int 1114union_check_rmdir(struct union_node *un, kauth_cred_t cred) 1115{ 1116 int dirlen, eofflag, error; 1117 char *dirbuf; 1118 struct vattr va; 1119 struct vnode *tvp; 1120 struct dirent *dp, *edp; 1121 struct componentname cn; 1122 struct iovec aiov; 1123 struct uio auio; 1124 1125 KASSERT(un->un_uppervp != NULL); 1126 1127 /* Check upper for being opaque. */ 1128 KASSERT(VOP_ISLOCKED(un->un_uppervp)); 1129 error = VOP_GETATTR(un->un_uppervp, &va, cred); 1130 if (error || (va.va_flags & OPAQUE)) 1131 return error; 1132 1133 if (un->un_lowervp == NULL) 1134 return 0; 1135 1136 /* Check lower for being empty. */ 1137 vn_lock(un->un_lowervp, LK_SHARED | LK_RETRY); 1138 error = VOP_GETATTR(un->un_lowervp, &va, cred); 1139 if (error) { 1140 VOP_UNLOCK(un->un_lowervp); 1141 return error; 1142 } 1143 dirlen = va.va_blocksize; 1144 dirbuf = kmem_alloc(dirlen, KM_SLEEP); 1145 /* error = 0; */ 1146 eofflag = 0; 1147 auio.uio_offset = 0; 1148 do { 1149 aiov.iov_len = dirlen; 1150 aiov.iov_base = dirbuf; 1151 auio.uio_iov = &aiov; 1152 auio.uio_iovcnt = 1; 1153 auio.uio_resid = aiov.iov_len; 1154 auio.uio_rw = UIO_READ; 1155 UIO_SETUP_SYSSPACE(&auio); 1156 error = VOP_READDIR(un->un_lowervp, &auio, cred, &eofflag, 1157 NULL, NULL); 1158 if (error) 1159 break; 1160 edp = (struct dirent *)&dirbuf[dirlen - auio.uio_resid]; 1161 for (dp = (struct dirent *)dirbuf; 1162 error == 0 && dp < edp; 1163 dp = (struct dirent *)((char *)dp + dp->d_reclen)) { 1164 if (dp->d_reclen == 0) { 1165 error = ENOTEMPTY; 1166 break; 1167 } 1168 if (dp->d_type == DT_WHT || 1169 (dp->d_namlen == 1 && dp->d_name[0] == '.') || 1170 (dp->d_namlen == 2 && !memcmp(dp->d_name, "..", 2))) 1171 continue; 1172 /* Check for presence in the upper layer. */ 1173 cn.cn_nameiop = LOOKUP; 1174 cn.cn_flags = ISLASTCN | RDONLY; 1175 cn.cn_cred = cred; 1176 cn.cn_nameptr = dp->d_name; 1177 cn.cn_namelen = dp->d_namlen; 1178 error = VOP_LOOKUP(un->un_uppervp, &tvp, &cn); 1179 if (error == ENOENT && (cn.cn_flags & ISWHITEOUT)) { 1180 error = 0; 1181 continue; 1182 } 1183 if (error == 0) 1184 vrele(tvp); 1185 error = ENOTEMPTY; 1186 } 1187 } while (error == 0 && !eofflag); 1188 kmem_free(dirbuf, dirlen); 1189 VOP_UNLOCK(un->un_lowervp); 1190 1191 return error; 1192} 1193 1194/* 1195 * This hook is called from vn_readdir() to switch to lower directory 1196 * entry after the upper directory is read. 1197 */ 1198int 1199union_readdirhook(struct vnode **vpp, struct file *fp, struct lwp *l) 1200{ 1201 struct vnode *vp = *vpp, *lvp; 1202 struct vattr va; 1203 int error; 1204 1205 if (vp->v_op != union_vnodeop_p) 1206 return (0); 1207 1208 /* 1209 * If the directory is opaque, 1210 * then don't show lower entries 1211 */ 1212 vn_lock(vp, LK_SHARED | LK_RETRY); 1213 error = VOP_GETATTR(vp, &va, fp->f_cred); 1214 VOP_UNLOCK(vp); 1215 if (error || (va.va_flags & OPAQUE)) 1216 return error; 1217 1218 if ((lvp = union_dircache(vp, l)) == NULLVP) 1219 return (0); 1220 1221 error = VOP_OPEN(lvp, FREAD, fp->f_cred); 1222 if (error) { 1223 vput(lvp); 1224 return (error); 1225 } 1226 VOP_UNLOCK(lvp); 1227 fp->f_vnode = lvp; 1228 fp->f_offset = 0; 1229 error = vn_close(vp, FREAD, fp->f_cred); 1230 if (error) 1231 return (error); 1232 *vpp = lvp; 1233 return (0); 1234} 1235