39 */ 40 41#include <sys/param.h> 42#include <sys/systm.h> 43#include <sys/fcntl.h> 44#include <sys/file.h> 45#include <sys/filedesc.h> 46#include <sys/kernel.h> 47#include <sys/lock.h> 48#include <sys/malloc.h> 49#include <sys/module.h> 50#include <sys/mount.h> 51#include <sys/mutex.h> 52#include <sys/namei.h> 53#include <sys/stat.h> 54#include <sys/vnode.h> 55 56#include <vm/vm.h> 57#include <vm/vm_extern.h> /* for vnode_pager_setsize */ 58#include <vm/vm_object.h> /* for vm cache coherency */ 59#include <vm/uma.h> 60 61#include <fs/unionfs/union.h> 62 63#include <sys/proc.h> 64 65extern int union_init(void); 66 67/* must be power of two, otherwise change UNION_HASH() */ 68#define NHASH 32 69 70/* unsigned int ... */ 71#define UNION_HASH(u, l) \ 72 (((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1)) 73 74static LIST_HEAD(unhead, union_node) unhead[NHASH]; 75static int unvplock[NHASH]; 76 77static void union_dircache_r(struct vnode *vp, struct vnode ***vppp, 78 int *cntp); 79static int union_list_lock(int ix); 80static void union_list_unlock(int ix); 81static int union_relookup(struct union_mount *um, struct vnode *dvp, 82 struct vnode **vpp, 83 struct componentname *cnp, 84 struct componentname *cn, char *path, 85 int pathlen); 86static void union_updatevp(struct union_node *un, 87 struct vnode *uppervp, 88 struct vnode *lowervp); 89static void union_newlower(struct union_node *, struct vnode *); 90static void union_newupper(struct union_node *, struct vnode *); 91static int union_copyfile(struct vnode *, struct vnode *, 92 struct ucred *, struct thread *); 93static int union_vn_create(struct vnode **, struct union_node *, 94 struct thread *); 95static int union_vn_close(struct vnode *, int, struct ucred *, 96 struct thread *); 97 98int 99union_init() 100{ 101 int i; 102 103 for (i = 0; i < NHASH; i++) 104 LIST_INIT(&unhead[i]); 105 bzero((caddr_t)unvplock, sizeof(unvplock)); 106 return (0); 107} 108 109static int 110union_list_lock(ix) 111 int ix; 112{ 113 if (unvplock[ix] & UNVP_LOCKED) { 114 unvplock[ix] |= UNVP_WANT; 115 (void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0); 116 return (1); 117 } 118 unvplock[ix] |= UNVP_LOCKED; 119 return (0); 120} 121 122static void 123union_list_unlock(ix) 124 int ix; 125{ 126 unvplock[ix] &= ~UNVP_LOCKED; 127 128 if (unvplock[ix] & UNVP_WANT) { 129 unvplock[ix] &= ~UNVP_WANT; 130 wakeup((caddr_t) &unvplock[ix]); 131 } 132} 133 134/* 135 * union_updatevp: 136 * 137 * The uppervp, if not NULL, must be referenced and not locked by us 138 * The lowervp, if not NULL, must be referenced. 139 * 140 * if uppervp and lowervp match pointers already installed, nothing 141 * happens. The passed vp's (when matching) are not adjusted. This 142 * routine may only be called by union_newupper() and union_newlower(). 143 */ 144 145static void 146union_updatevp(un, uppervp, lowervp) 147 struct union_node *un; 148 struct vnode *uppervp; 149 struct vnode *lowervp; 150{ 151 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); 152 int nhash = UNION_HASH(uppervp, lowervp); 153 int docache = (lowervp != NULLVP || uppervp != NULLVP); 154 int lhash, uhash; 155 156 /* 157 * Ensure locking is ordered from lower to higher 158 * to avoid deadlocks. 159 */ 160 if (nhash < ohash) { 161 lhash = nhash; 162 uhash = ohash; 163 } else { 164 lhash = ohash; 165 uhash = nhash; 166 } 167 168 if (lhash != uhash) { 169 while (union_list_lock(lhash)) 170 continue; 171 } 172 173 while (union_list_lock(uhash)) 174 continue; 175 176 if (ohash != nhash || !docache) { 177 if (un->un_flags & UN_CACHED) { 178 un->un_flags &= ~UN_CACHED; 179 LIST_REMOVE(un, un_cache); 180 } 181 } 182 183 if (ohash != nhash) 184 union_list_unlock(ohash); 185 186 if (un->un_lowervp != lowervp) { 187 if (un->un_lowervp) { 188 vrele(un->un_lowervp); 189 if (un->un_path) { 190 free(un->un_path, M_TEMP); 191 un->un_path = 0; 192 } 193 } 194 un->un_lowervp = lowervp; 195 un->un_lowersz = VNOVAL; 196 } 197 198 if (un->un_uppervp != uppervp) { 199 if (un->un_uppervp) 200 vrele(un->un_uppervp); 201 un->un_uppervp = uppervp; 202 un->un_uppersz = VNOVAL; 203 } 204 205 if (docache && (ohash != nhash)) { 206 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache); 207 un->un_flags |= UN_CACHED; 208 } 209 210 union_list_unlock(nhash); 211} 212 213/* 214 * Set a new lowervp. The passed lowervp must be referenced and will be 215 * stored in the vp in a referenced state. 216 */ 217 218static void 219union_newlower(un, lowervp) 220 struct union_node *un; 221 struct vnode *lowervp; 222{ 223 union_updatevp(un, un->un_uppervp, lowervp); 224} 225 226/* 227 * Set a new uppervp. The passed uppervp must be locked and will be 228 * stored in the vp in a locked state. The caller should not unlock 229 * uppervp. 230 */ 231 232static void 233union_newupper(un, uppervp) 234 struct union_node *un; 235 struct vnode *uppervp; 236{ 237 union_updatevp(un, uppervp, un->un_lowervp); 238} 239 240/* 241 * Keep track of size changes in the underlying vnodes. 242 * If the size changes, then callback to the vm layer 243 * giving priority to the upper layer size. 244 */ 245void 246union_newsize(vp, uppersz, lowersz) 247 struct vnode *vp; 248 off_t uppersz, lowersz; 249{ 250 struct union_node *un; 251 off_t sz; 252 253 /* only interested in regular files */ 254 if (vp->v_type != VREG) 255 return; 256 257 un = VTOUNION(vp); 258 sz = VNOVAL; 259 260 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) { 261 un->un_uppersz = uppersz; 262 if (sz == VNOVAL) 263 sz = un->un_uppersz; 264 } 265 266 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) { 267 un->un_lowersz = lowersz; 268 if (sz == VNOVAL) 269 sz = un->un_lowersz; 270 } 271 272 if (sz != VNOVAL) { 273 UDEBUG(("union: %s size now %ld\n", 274 (uppersz != VNOVAL ? "upper" : "lower"), (long)sz)); 275 /* 276 * There is no need to change size of non-existent object 277 */ 278 /* vnode_pager_setsize(vp, sz); */ 279 } 280} 281 282/* 283 * union_allocvp: allocate a union_node and associate it with a 284 * parent union_node and one or two vnodes. 285 * 286 * vpp Holds the returned vnode locked and referenced if no 287 * error occurs. 288 * 289 * mp Holds the mount point. mp may or may not be busied. 290 * allocvp makes no changes to mp. 291 * 292 * dvp Holds the parent union_node to the one we wish to create. 293 * XXX may only be used to traverse an uncopied lowervp-based 294 * tree? XXX 295 * 296 * dvp may or may not be locked. allocvp makes no changes 297 * to dvp. 298 * 299 * upperdvp Holds the parent vnode to uppervp, generally used along 300 * with path component information to create a shadow of 301 * lowervp when uppervp does not exist. 302 * 303 * upperdvp is referenced but unlocked on entry, and will be 304 * dereferenced on return. 305 * 306 * uppervp Holds the new uppervp vnode to be stored in the 307 * union_node we are allocating. uppervp is referenced but 308 * not locked, and will be dereferenced on return. 309 * 310 * lowervp Holds the new lowervp vnode to be stored in the 311 * union_node we are allocating. lowervp is referenced but 312 * not locked, and will be dereferenced on return. 313 * 314 * cnp Holds path component information to be coupled with 315 * lowervp and upperdvp to allow unionfs to create an uppervp 316 * later on. Only used if lowervp is valid. The conents 317 * of cnp is only valid for the duration of the call. 318 * 319 * docache Determine whether this node should be entered in the 320 * cache or whether it should be destroyed as soon as possible. 321 * 322 * all union_nodes are maintained on a singly-linked 323 * list. new nodes are only allocated when they cannot 324 * be found on this list. entries on the list are 325 * removed when the vfs reclaim entry is called. 326 * 327 * a single lock is kept for the entire list. this is 328 * needed because the getnewvnode() function can block 329 * waiting for a vnode to become free, in which case there 330 * may be more than one process trying to get the same 331 * vnode. this lock is only taken if we are going to 332 * call getnewvnode, since the kernel itself is single-threaded. 333 * 334 * if an entry is found on the list, then call vget() to 335 * take a reference. this is done because there may be 336 * zero references to it and so it needs to removed from 337 * the vnode free list. 338 */ 339 340int 341union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache) 342 struct vnode **vpp; 343 struct mount *mp; 344 struct vnode *dvp; /* parent union vnode */ 345 struct vnode *upperdvp; /* parent vnode of uppervp */ 346 struct componentname *cnp; /* may be null */ 347 struct vnode *uppervp; /* may be null */ 348 struct vnode *lowervp; /* may be null */ 349 int docache; 350{ 351 int error; 352 struct union_node *un = 0; 353 struct union_mount *um = MOUNTTOUNIONMOUNT(mp); 354 struct thread *td = (cnp) ? cnp->cn_thread : curthread; 355 int hash = 0; 356 int vflag; 357 int try; 358 359 if (uppervp == NULLVP && lowervp == NULLVP) 360 panic("union: unidentifiable allocation"); 361 362 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { 363 vrele(lowervp); 364 lowervp = NULLVP; 365 } 366 367 /* detect the root vnode (and aliases) */ 368 vflag = 0; 369 if ((uppervp == um->um_uppervp) && 370 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) { 371 if (lowervp == NULLVP) { 372 lowervp = um->um_lowervp; 373 if (lowervp != NULLVP) 374 VREF(lowervp); 375 } 376 vflag = VV_ROOT; 377 } 378 379loop: 380 if (!docache) { 381 un = 0; 382 } else for (try = 0; try < 3; try++) { 383 switch (try) { 384 case 0: 385 if (lowervp == NULLVP) 386 continue; 387 hash = UNION_HASH(uppervp, lowervp); 388 break; 389 390 case 1: 391 if (uppervp == NULLVP) 392 continue; 393 hash = UNION_HASH(uppervp, NULLVP); 394 break; 395 396 case 2: 397 if (lowervp == NULLVP) 398 continue; 399 hash = UNION_HASH(NULLVP, lowervp); 400 break; 401 } 402 403 while (union_list_lock(hash)) 404 continue; 405 406 LIST_FOREACH(un, &unhead[hash], un_cache) { 407 if ((un->un_lowervp == lowervp || 408 un->un_lowervp == NULLVP) && 409 (un->un_uppervp == uppervp || 410 un->un_uppervp == NULLVP) && 411 (UNIONTOV(un)->v_mount == mp)) { 412 if (vget(UNIONTOV(un), 0, 413 cnp ? cnp->cn_thread : NULL)) { 414 union_list_unlock(hash); 415 goto loop; 416 } 417 break; 418 } 419 } 420 421 union_list_unlock(hash); 422 423 if (un) 424 break; 425 } 426 427 if (un) { 428 /* 429 * Obtain a lock on the union_node. Everything is unlocked 430 * except for dvp, so check that case. If they match, our 431 * new un is already locked. Otherwise we have to lock our 432 * new un. 433 * 434 * A potential deadlock situation occurs when we are holding 435 * one lock while trying to get another. We must follow 436 * strict ordering rules to avoid it. We try to locate dvp 437 * by scanning up from un_vnode, since the most likely 438 * scenario is un being under dvp. 439 */ 440 441 if (dvp && un->un_vnode != dvp) { 442 struct vnode *scan = un->un_vnode; 443 444 do { 445 scan = VTOUNION(scan)->un_pvp;
|
550 if (error) { 551 /* 552 * If an error occurs clear out vnodes. 553 */ 554 if (lowervp) 555 vrele(lowervp); 556 if (uppervp) 557 vrele(uppervp); 558 if (upperdvp) 559 vrele(upperdvp); 560 *vpp = NULL; 561 goto out; 562 } 563 564 MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), 565 M_TEMP, M_WAITOK); 566 567 ASSERT_VOP_LOCKED(*vpp, "union_allocvp"); 568 (*vpp)->v_vflag |= vflag; 569 if (uppervp) 570 (*vpp)->v_type = uppervp->v_type; 571 else 572 (*vpp)->v_type = lowervp->v_type; 573 574 un = VTOUNION(*vpp); 575 bzero(un, sizeof(*un)); 576 577 lockinit(&un->un_lock, PVFS, "unlock", VLKTIMEOUT, 0); 578 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); 579 580 un->un_vnode = *vpp; 581 un->un_uppervp = uppervp; 582 un->un_uppersz = VNOVAL; 583 un->un_lowervp = lowervp; 584 un->un_lowersz = VNOVAL; 585 un->un_dirvp = upperdvp; 586 un->un_pvp = dvp; /* only parent dir in new allocation */ 587 if (dvp != NULLVP) 588 VREF(dvp); 589 un->un_dircache = 0; 590 un->un_openl = 0; 591 592 if (cnp && (lowervp != NULLVP)) { 593 un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); 594 bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); 595 un->un_path[cnp->cn_namelen] = '\0'; 596 } else { 597 un->un_path = 0; 598 un->un_dirvp = NULL; 599 } 600 601 if (docache) { 602 LIST_INSERT_HEAD(&unhead[hash], un, un_cache); 603 un->un_flags |= UN_CACHED; 604 } 605 606out: 607 if (docache) 608 union_list_unlock(hash); 609 610 return (error); 611} 612 613int 614union_freevp(vp) 615 struct vnode *vp; 616{ 617 struct union_node *un = VTOUNION(vp); 618 619 if (un->un_flags & UN_CACHED) { 620 un->un_flags &= ~UN_CACHED; 621 LIST_REMOVE(un, un_cache); 622 } 623 624 if (un->un_pvp != NULLVP) { 625 vrele(un->un_pvp); 626 un->un_pvp = NULL; 627 } 628 if (un->un_uppervp != NULLVP) { 629 vrele(un->un_uppervp); 630 un->un_uppervp = NULL; 631 } 632 if (un->un_lowervp != NULLVP) { 633 vrele(un->un_lowervp); 634 un->un_lowervp = NULL; 635 } 636 if (un->un_dirvp != NULLVP) { 637 vrele(un->un_dirvp); 638 un->un_dirvp = NULL; 639 } 640 if (un->un_path) { 641 free(un->un_path, M_TEMP); 642 un->un_path = NULL; 643 } 644 lockdestroy(&un->un_lock); 645 646 FREE(vp->v_data, M_TEMP); 647 vp->v_data = 0; 648 649 return (0); 650} 651 652/* 653 * copyfile. copy the vnode (fvp) to the vnode (tvp) 654 * using a sequence of reads and writes. both (fvp) 655 * and (tvp) are locked on entry and exit. 656 * 657 * fvp and tvp are both exclusive locked on call, but their refcount's 658 * haven't been bumped at all. 659 */ 660static int 661union_copyfile(fvp, tvp, cred, td) 662 struct vnode *fvp; 663 struct vnode *tvp; 664 struct ucred *cred; 665 struct thread *td; 666{ 667 char *buf; 668 struct uio uio; 669 struct iovec iov; 670 int error = 0; 671 672 /* 673 * strategy: 674 * allocate a buffer of size MAXBSIZE. 675 * loop doing reads and writes, keeping track 676 * of the current uio offset. 677 * give up at the first sign of trouble. 678 */ 679 680 bzero(&uio, sizeof(uio)); 681 682 uio.uio_td = td; 683 uio.uio_segflg = UIO_SYSSPACE; 684 uio.uio_offset = 0; 685 686 VOP_LEASE(fvp, td, cred, LEASE_READ); 687 VOP_LEASE(tvp, td, cred, LEASE_WRITE); 688 689 buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); 690 691 /* ugly loop follows... */ 692 do { 693 off_t offset = uio.uio_offset; 694 int count; 695 int bufoffset; 696 697 /* 698 * Setup for big read 699 */ 700 uio.uio_iov = &iov; 701 uio.uio_iovcnt = 1; 702 iov.iov_base = buf; 703 iov.iov_len = MAXBSIZE; 704 uio.uio_resid = iov.iov_len; 705 uio.uio_rw = UIO_READ; 706 707 if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0) 708 break; 709 710 /* 711 * Get bytes read, handle read eof case and setup for 712 * write loop 713 */ 714 if ((count = MAXBSIZE - uio.uio_resid) == 0) 715 break; 716 bufoffset = 0; 717 718 /* 719 * Write until an error occurs or our buffer has been 720 * exhausted, then update the offset for the next read. 721 */ 722 while (bufoffset < count) { 723 uio.uio_iov = &iov; 724 uio.uio_iovcnt = 1; 725 iov.iov_base = buf + bufoffset; 726 iov.iov_len = count - bufoffset; 727 uio.uio_offset = offset + bufoffset; 728 uio.uio_rw = UIO_WRITE; 729 uio.uio_resid = iov.iov_len; 730 731 if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0) 732 break; 733 bufoffset += (count - bufoffset) - uio.uio_resid; 734 } 735 uio.uio_offset = offset + bufoffset; 736 } while (error == 0); 737 738 free(buf, M_TEMP); 739 return (error); 740} 741 742/* 743 * 744 * un's vnode is assumed to be locked on entry and remains locked on exit. 745 */ 746 747int 748union_copyup(un, docopy, cred, td) 749 struct union_node *un; 750 int docopy; 751 struct ucred *cred; 752 struct thread *td; 753{ 754 int error; 755 struct mount *mp; 756 struct vnode *lvp, *uvp; 757 758 /* 759 * If the user does not have read permission, the vnode should not 760 * be copied to upper layer. 761 */ 762 vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, td); 763 error = VOP_ACCESS(un->un_lowervp, VREAD, cred, td); 764 VOP_UNLOCK(un->un_lowervp, 0, td); 765 if (error) 766 return (error); 767 768 if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | PCATCH)) != 0) 769 return (error); 770 if ((error = union_vn_create(&uvp, un, td)) != 0) { 771 vn_finished_write(mp); 772 return (error); 773 } 774 775 lvp = un->un_lowervp; 776 777 KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); 778 if (docopy) { 779 /* 780 * XX - should not ignore errors 781 * from VOP_CLOSE 782 */ 783 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td); 784 error = VOP_OPEN(lvp, FREAD, cred, td); 785 if (error == 0 && vn_canvmio(lvp) == TRUE) 786 error = vfs_object_create(lvp, td, cred); 787 if (error == 0) { 788 error = union_copyfile(lvp, uvp, cred, td); 789 VOP_UNLOCK(lvp, 0, td); 790 (void) VOP_CLOSE(lvp, FREAD, cred, td); 791 } 792 if (error == 0) 793 UDEBUG(("union: copied up %s\n", un->un_path)); 794 795 } 796 VOP_UNLOCK(uvp, 0, td); 797 vn_finished_write(mp); 798 union_newupper(un, uvp); 799 KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); 800 union_vn_close(uvp, FWRITE, cred, td); 801 KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount)); 802 /* 803 * Subsequent IOs will go to the top layer, so 804 * call close on the lower vnode and open on the 805 * upper vnode to ensure that the filesystem keeps 806 * its references counts right. This doesn't do 807 * the right thing with (cred) and (FREAD) though. 808 * Ignoring error returns is not right, either. 809 */ 810 if (error == 0) { 811 int i; 812 813 for (i = 0; i < un->un_openl; i++) { 814 (void) VOP_CLOSE(lvp, FREAD, cred, td); 815 (void) VOP_OPEN(uvp, FREAD, cred, td); 816 } 817 if (un->un_openl) { 818 if (vn_canvmio(uvp) == TRUE) 819 error = vfs_object_create(uvp, td, cred); 820 } 821 un->un_openl = 0; 822 } 823 824 return (error); 825 826} 827 828/* 829 * union_relookup: 830 * 831 * dvp should be locked on entry and will be locked on return. No 832 * net change in the ref count will occur. 833 * 834 * If an error is returned, *vpp will be invalid, otherwise it 835 * will hold a locked, referenced vnode. If *vpp == dvp then 836 * remember that only one exclusive lock is held. 837 */ 838 839static int 840union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) 841 struct union_mount *um; 842 struct vnode *dvp; 843 struct vnode **vpp; 844 struct componentname *cnp; 845 struct componentname *cn; 846 char *path; 847 int pathlen; 848{ 849 int error; 850 851 /* 852 * A new componentname structure must be faked up because 853 * there is no way to know where the upper level cnp came 854 * from or what it is being used for. This must duplicate 855 * some of the work done by NDINIT, some of the work done 856 * by namei, some of the work done by lookup and some of 857 * the work done by VOP_LOOKUP when given a CREATE flag. 858 * Conclusion: Horrible. 859 */ 860 cn->cn_namelen = pathlen; 861 cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 862 bcopy(path, cn->cn_pnbuf, cn->cn_namelen); 863 cn->cn_pnbuf[cn->cn_namelen] = '\0'; 864 865 cn->cn_nameiop = CREATE; 866 cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); 867 cn->cn_thread = cnp->cn_thread; 868 if (um->um_op == UNMNT_ABOVE) 869 cn->cn_cred = cnp->cn_cred; 870 else 871 cn->cn_cred = um->um_cred; 872 cn->cn_nameptr = cn->cn_pnbuf; 873 cn->cn_consume = cnp->cn_consume; 874 875 VREF(dvp); 876 VOP_UNLOCK(dvp, 0, cnp->cn_thread); 877 878 /* 879 * Pass dvp unlocked and referenced on call to relookup(). 880 * 881 * If an error occurs, dvp will be returned unlocked and dereferenced. 882 */ 883 884 if ((error = relookup(dvp, vpp, cn)) != 0) { 885 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_thread); 886 return(error); 887 } 888 889 /* 890 * If no error occurs, dvp will be returned locked with the reference 891 * left as before, and vpp will be returned referenced and locked. 892 * 893 * We want to return with dvp as it was passed to us, so we get 894 * rid of our reference. 895 */ 896 vrele(dvp); 897 return (0); 898} 899 900/* 901 * Create a shadow directory in the upper layer. 902 * The new vnode is returned locked. 903 * 904 * (um) points to the union mount structure for access to the 905 * the mounting process's credentials. 906 * (dvp) is the directory in which to create the shadow directory, 907 * it is locked (but not ref'd) on entry and return. 908 * (cnp) is the componentname to be created. 909 * (vpp) is the returned newly created shadow directory, which 910 * is returned locked and ref'd 911 */ 912int 913union_mkshadow(um, dvp, cnp, vpp) 914 struct union_mount *um; 915 struct vnode *dvp; 916 struct componentname *cnp; 917 struct vnode **vpp; 918{ 919 int error; 920 struct vattr va; 921 struct thread *td = cnp->cn_thread; 922 struct componentname cn; 923 struct mount *mp; 924 925 if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0) 926 return (error); 927 if ((error = union_relookup(um, dvp, vpp, cnp, &cn, 928 cnp->cn_nameptr, cnp->cn_namelen)) != 0) { 929 vn_finished_write(mp); 930 return (error); 931 } 932 933 if (*vpp) { 934 if (cn.cn_flags & HASBUF) { 935 uma_zfree(namei_zone, cn.cn_pnbuf); 936 cn.cn_flags &= ~HASBUF; 937 } 938 if (dvp == *vpp) 939 vrele(*vpp); 940 else 941 vput(*vpp); 942 vn_finished_write(mp); 943 *vpp = NULLVP; 944 return (EEXIST); 945 } 946 947 /* 948 * policy: when creating the shadow directory in the 949 * upper layer, create it owned by the user who did 950 * the mount, group from parent directory, and mode 951 * 777 modified by umask (ie mostly identical to the 952 * mkdir syscall). (jsp, kb) 953 */ 954 955 VATTR_NULL(&va); 956 va.va_type = VDIR; 957 va.va_mode = um->um_cmode; 958 959 /* VOP_LEASE: dvp is locked */ 960 VOP_LEASE(dvp, td, cn.cn_cred, LEASE_WRITE); 961 962 error = VOP_MKDIR(dvp, vpp, &cn, &va); 963 if (cn.cn_flags & HASBUF) { 964 uma_zfree(namei_zone, cn.cn_pnbuf); 965 cn.cn_flags &= ~HASBUF; 966 } 967 /*vput(dvp);*/ 968 vn_finished_write(mp); 969 return (error); 970} 971 972/* 973 * Create a whiteout entry in the upper layer. 974 * 975 * (um) points to the union mount structure for access to the 976 * the mounting process's credentials. 977 * (dvp) is the directory in which to create the whiteout. 978 * it is locked on entry and return. 979 * (cnp) is the componentname to be created. 980 */ 981int 982union_mkwhiteout(um, dvp, cnp, path) 983 struct union_mount *um; 984 struct vnode *dvp; 985 struct componentname *cnp; 986 char *path; 987{ 988 int error; 989 struct thread *td = cnp->cn_thread; 990 struct vnode *wvp; 991 struct componentname cn; 992 struct mount *mp; 993 994 if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0) 995 return (error); 996 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); 997 if (error) { 998 vn_finished_write(mp); 999 return (error); 1000 } 1001 1002 if (wvp) { 1003 if (cn.cn_flags & HASBUF) { 1004 uma_zfree(namei_zone, cn.cn_pnbuf); 1005 cn.cn_flags &= ~HASBUF; 1006 } 1007 if (wvp == dvp) 1008 vrele(wvp); 1009 else 1010 vput(wvp); 1011 vn_finished_write(mp); 1012 return (EEXIST); 1013 } 1014 1015 /* VOP_LEASE: dvp is locked */ 1016 VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE); 1017 1018 error = VOP_WHITEOUT(dvp, &cn, CREATE); 1019 if (cn.cn_flags & HASBUF) { 1020 uma_zfree(namei_zone, cn.cn_pnbuf); 1021 cn.cn_flags &= ~HASBUF; 1022 } 1023 vn_finished_write(mp); 1024 return (error); 1025} 1026 1027/* 1028 * union_vn_create: creates and opens a new shadow file 1029 * on the upper union layer. this function is similar 1030 * in spirit to calling vn_open but it avoids calling namei(). 1031 * the problem with calling namei is that a) it locks too many 1032 * things, and b) it doesn't start at the "right" directory, 1033 * whereas relookup is told where to start. 1034 * 1035 * On entry, the vnode associated with un is locked. It remains locked 1036 * on return. 1037 * 1038 * If no error occurs, *vpp contains a locked referenced vnode for your 1039 * use. If an error occurs *vpp iis undefined. 1040 */ 1041static int 1042union_vn_create(vpp, un, td) 1043 struct vnode **vpp; 1044 struct union_node *un; 1045 struct thread *td; 1046{ 1047 struct vnode *vp; 1048 struct ucred *cred = td->td_ucred; 1049 struct vattr vat; 1050 struct vattr *vap = &vat; 1051 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); 1052 int error; 1053 int cmode; 1054 struct componentname cn; 1055 1056 *vpp = NULLVP; 1057 FILEDESC_LOCK(td->td_proc->p_fd); 1058 cmode = UN_FILEMODE & ~td->td_proc->p_fd->fd_cmask; 1059 FILEDESC_UNLOCK(td->td_proc->p_fd); 1060 1061 /* 1062 * Build a new componentname structure (for the same 1063 * reasons outlines in union_mkshadow). 1064 * The difference here is that the file is owned by 1065 * the current user, rather than by the person who 1066 * did the mount, since the current user needs to be 1067 * able to write the file (that's why it is being 1068 * copied in the first place). 1069 */ 1070 cn.cn_namelen = strlen(un->un_path); 1071 cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 1072 bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); 1073 cn.cn_nameiop = CREATE; 1074 cn.cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); 1075 cn.cn_thread = td; 1076 cn.cn_cred = td->td_ucred; 1077 cn.cn_nameptr = cn.cn_pnbuf; 1078 cn.cn_consume = 0; 1079 1080 /* 1081 * Pass dvp unlocked and referenced on call to relookup(). 1082 * 1083 * If an error occurs, dvp will be returned unlocked and dereferenced. 1084 */ 1085 VREF(un->un_dirvp); 1086 error = relookup(un->un_dirvp, &vp, &cn); 1087 if (error) 1088 return (error); 1089 1090 /* 1091 * If no error occurs, dvp will be returned locked with the reference 1092 * left as before, and vpp will be returned referenced and locked. 1093 */ 1094 if (vp) { 1095 vput(un->un_dirvp); 1096 if (cn.cn_flags & HASBUF) { 1097 uma_zfree(namei_zone, cn.cn_pnbuf); 1098 cn.cn_flags &= ~HASBUF; 1099 } 1100 if (vp == un->un_dirvp) 1101 vrele(vp); 1102 else 1103 vput(vp); 1104 return (EEXIST); 1105 } 1106 1107 /* 1108 * Good - there was no race to create the file 1109 * so go ahead and create it. The permissions 1110 * on the file will be 0666 modified by the 1111 * current user's umask. Access to the file, while 1112 * it is unioned, will require access to the top *and* 1113 * bottom files. Access when not unioned will simply 1114 * require access to the top-level file. 1115 * TODO: confirm choice of access permissions. 1116 */ 1117 VATTR_NULL(vap); 1118 vap->va_type = VREG; 1119 vap->va_mode = cmode; 1120 VOP_LEASE(un->un_dirvp, td, cred, LEASE_WRITE); 1121 error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap); 1122 if (cn.cn_flags & HASBUF) { 1123 uma_zfree(namei_zone, cn.cn_pnbuf); 1124 cn.cn_flags &= ~HASBUF; 1125 } 1126 vput(un->un_dirvp); 1127 if (error) 1128 return (error); 1129 1130 error = VOP_OPEN(vp, fmode, cred, td); 1131 if (error == 0 && vn_canvmio(vp) == TRUE) 1132 error = vfs_object_create(vp, td, cred); 1133 if (error) { 1134 vput(vp); 1135 return (error); 1136 } 1137 vp->v_writecount++; 1138 *vpp = vp; 1139 return (0); 1140} 1141 1142static int 1143union_vn_close(vp, fmode, cred, td) 1144 struct vnode *vp; 1145 int fmode; 1146 struct ucred *cred; 1147 struct thread *td; 1148{ 1149 1150 if (fmode & FWRITE) 1151 --vp->v_writecount; 1152 return (VOP_CLOSE(vp, fmode, cred, td)); 1153} 1154 1155#if 0 1156 1157/* 1158 * union_removed_upper: 1159 * 1160 * called with union_node unlocked. XXX 1161 */ 1162 1163void 1164union_removed_upper(un) 1165 struct union_node *un; 1166{ 1167 struct thread *td = curthread; /* XXX */ 1168 struct vnode **vpp; 1169 1170 /* 1171 * Do not set the uppervp to NULLVP. If lowervp is NULLVP, 1172 * union node will have neither uppervp nor lowervp. We remove 1173 * the union node from cache, so that it will not be referrenced. 1174 */ 1175 union_newupper(un, NULLVP); 1176 if (un->un_dircache != 0) { 1177 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) 1178 vrele(*vpp); 1179 free(un->un_dircache, M_TEMP); 1180 un->un_dircache = 0; 1181 } 1182 1183 if (un->un_flags & UN_CACHED) { 1184 un->un_flags &= ~UN_CACHED; 1185 LIST_REMOVE(un, un_cache); 1186 } 1187} 1188 1189#endif 1190 1191/* 1192 * determine whether a whiteout is needed 1193 * during a remove/rmdir operation. 1194 */ 1195int 1196union_dowhiteout(un, cred, td) 1197 struct union_node *un; 1198 struct ucred *cred; 1199 struct thread *td; 1200{ 1201 struct vattr va; 1202 1203 if (un->un_lowervp != NULLVP) 1204 return (1); 1205 1206 if (VOP_GETATTR(un->un_uppervp, &va, cred, td) == 0 && 1207 (va.va_flags & OPAQUE)) 1208 return (1); 1209 1210 return (0); 1211} 1212 1213static void 1214union_dircache_r(vp, vppp, cntp) 1215 struct vnode *vp; 1216 struct vnode ***vppp; 1217 int *cntp; 1218{ 1219 struct union_node *un; 1220 1221 if (vp->v_op != union_vnodeop_p) { 1222 if (vppp) { 1223 VREF(vp); 1224 *(*vppp)++ = vp; 1225 if (--(*cntp) == 0) 1226 panic("union: dircache table too small"); 1227 } else { 1228 (*cntp)++; 1229 } 1230 1231 return; 1232 } 1233 1234 un = VTOUNION(vp); 1235 if (un->un_uppervp != NULLVP) 1236 union_dircache_r(un->un_uppervp, vppp, cntp); 1237 if (un->un_lowervp != NULLVP) 1238 union_dircache_r(un->un_lowervp, vppp, cntp); 1239} 1240 1241struct vnode * 1242union_dircache(vp, td) 1243 struct vnode *vp; 1244 struct thread *td; 1245{ 1246 int cnt; 1247 struct vnode *nvp; 1248 struct vnode **vpp; 1249 struct vnode **dircache; 1250 struct union_node *un; 1251 int error; 1252 1253 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1254 dircache = VTOUNION(vp)->un_dircache; 1255 1256 nvp = NULLVP; 1257 1258 if (dircache == NULL) { 1259 cnt = 0; 1260 union_dircache_r(vp, 0, &cnt); 1261 cnt++; 1262 dircache = malloc(cnt * sizeof(struct vnode *), 1263 M_TEMP, M_WAITOK); 1264 vpp = dircache; 1265 union_dircache_r(vp, &vpp, &cnt); 1266 *vpp = NULLVP; 1267 vpp = dircache + 1; 1268 } else { 1269 vpp = dircache; 1270 do { 1271 if (*vpp++ == VTOUNION(vp)->un_uppervp) 1272 break; 1273 } while (*vpp != NULLVP); 1274 } 1275 1276 if (*vpp == NULLVP) 1277 goto out; 1278 1279 /*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td);*/ 1280 UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? (*vpp)->v_usecount : -99))); 1281 VREF(*vpp); 1282 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0); 1283 UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? (*vpp)->v_usecount : -99))); 1284 if (error) 1285 goto out; 1286 1287 VTOUNION(vp)->un_dircache = 0; 1288 un = VTOUNION(nvp); 1289 un->un_dircache = dircache; 1290 1291out: 1292 VOP_UNLOCK(vp, 0, td); 1293 return (nvp); 1294} 1295 1296/* 1297 * Module glue to remove #ifdef UNION from vfs_syscalls.c 1298 */ 1299static int 1300union_dircheck(struct thread *td, struct vnode **vp, struct file *fp) 1301{ 1302 int error = 0; 1303 1304 if ((*vp)->v_op == union_vnodeop_p) { 1305 struct vnode *lvp; 1306 1307 lvp = union_dircache(*vp, td); 1308 if (lvp != NULLVP) { 1309 struct vattr va; 1310 1311 /* 1312 * If the directory is opaque, 1313 * then don't show lower entries 1314 */ 1315 error = VOP_GETATTR(*vp, &va, fp->f_cred, td); 1316 if (va.va_flags & OPAQUE) { 1317 vput(lvp); 1318 lvp = NULL; 1319 } 1320 } 1321 1322 if (lvp != NULLVP) { 1323 error = VOP_OPEN(lvp, FREAD, fp->f_cred, td); 1324 if (error == 0 && vn_canvmio(lvp) == TRUE) 1325 error = vfs_object_create(lvp, td, fp->f_cred); 1326 if (error) { 1327 vput(lvp); 1328 return (error); 1329 } 1330 VOP_UNLOCK(lvp, 0, td); 1331 FILE_LOCK(fp); 1332 fp->f_data = (caddr_t) lvp; 1333 fp->f_offset = 0; 1334 FILE_UNLOCK(fp); 1335 error = vn_close(*vp, FREAD, fp->f_cred, td); 1336 if (error) 1337 return (error); 1338 *vp = lvp; 1339 return -1; /* goto unionread */ 1340 } 1341 } 1342 return error; 1343} 1344 1345static int 1346union_modevent(module_t mod, int type, void *data) 1347{ 1348 switch (type) { 1349 case MOD_LOAD: 1350 union_dircheckp = union_dircheck; 1351 break; 1352 case MOD_UNLOAD: 1353 union_dircheckp = NULL; 1354 break; 1355 default: 1356 break; 1357 } 1358 return 0; 1359} 1360 1361static moduledata_t union_mod = { 1362 "union_dircheck", 1363 union_modevent, 1364 NULL 1365}; 1366 1367DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY);
|