52#include <miscfs/union/union.h> 53 54#include <sys/proc.h> 55 56extern int union_init __P((void)); 57 58/* must be power of two, otherwise change UNION_HASH() */ 59#define NHASH 32 60 61/* unsigned int ... */ 62#define UNION_HASH(u, l) \ 63 (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1)) 64 65static LIST_HEAD(unhead, union_node) unhead[NHASH]; 66static int unvplock[NHASH]; 67 68static void union_dircache_r __P((struct vnode *vp, struct vnode ***vppp, 69 int *cntp)); 70static int union_list_lock __P((int ix)); 71static void union_list_unlock __P((int ix)); 72static int union_relookup __P((struct union_mount *um, struct vnode *dvp, 73 struct vnode **vpp, 74 struct componentname *cnp, 75 struct componentname *cn, char *path, 76 int pathlen)); 77static void union_updatevp __P((struct union_node *un, 78 struct vnode *uppervp, 79 struct vnode *lowervp)); 80static void union_newlower __P((struct union_node *, struct vnode *)); 81static void union_newupper __P((struct union_node *, struct vnode *)); 82static int union_copyfile __P((struct vnode *, struct vnode *, 83 struct ucred *, struct proc *)); 84static int union_vn_create __P((struct vnode **, struct union_node *, 85 struct proc *)); 86static int union_vn_close __P((struct vnode *, int, struct ucred *, 87 struct proc *)); 88 89int 90union_init() 91{ 92 int i; 93 94 for (i = 0; i < NHASH; i++) 95 LIST_INIT(&unhead[i]); 96 bzero((caddr_t) unvplock, sizeof(unvplock)); 97 return (0); 98} 99 100static int 101union_list_lock(ix) 102 int ix; 103{ 104 105 if (unvplock[ix] & UN_LOCKED) { 106 unvplock[ix] |= UN_WANT; 107 (void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0); 108 return (1); 109 } 110 111 unvplock[ix] |= UN_LOCKED; 112 113 return (0); 114} 115 116static void 117union_list_unlock(ix) 118 int ix; 119{ 120 121 unvplock[ix] &= ~UN_LOCKED; 122 123 if (unvplock[ix] & UN_WANT) { 124 unvplock[ix] &= ~UN_WANT; 125 wakeup((caddr_t) &unvplock[ix]); 126 } 127} 128 129static void 130union_updatevp(un, uppervp, lowervp) 131 struct union_node *un; 132 struct vnode *uppervp; 133 struct vnode *lowervp; 134{ 135 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); 136 int nhash = UNION_HASH(uppervp, lowervp); 137 int docache = (lowervp != NULLVP || uppervp != NULLVP); 138 int lhash, uhash; 139 140 /* 141 * Ensure locking is ordered from lower to higher 142 * to avoid deadlocks. 143 */ 144 if (nhash < ohash) { 145 lhash = nhash; 146 uhash = ohash; 147 } else { 148 lhash = ohash; 149 uhash = nhash; 150 } 151 152 if (lhash != uhash) 153 while (union_list_lock(lhash)) 154 continue; 155 156 while (union_list_lock(uhash)) 157 continue; 158 159 if (ohash != nhash || !docache) { 160 if (un->un_flags & UN_CACHED) { 161 un->un_flags &= ~UN_CACHED; 162 LIST_REMOVE(un, un_cache); 163 } 164 } 165 166 if (ohash != nhash) 167 union_list_unlock(ohash); 168 169 if (un->un_lowervp != lowervp) { 170 if (un->un_lowervp) { 171 vrele(un->un_lowervp); 172 if (un->un_path) { 173 free(un->un_path, M_TEMP); 174 un->un_path = 0; 175 } 176 if (un->un_dirvp) { 177 vrele(un->un_dirvp); 178 un->un_dirvp = NULLVP; 179 } 180 } 181 un->un_lowervp = lowervp; 182 un->un_lowersz = VNOVAL; 183 } 184 185 if (un->un_uppervp != uppervp) { 186 if (un->un_uppervp) 187 vrele(un->un_uppervp); 188 189 un->un_uppervp = uppervp; 190 un->un_uppersz = VNOVAL; 191 } 192 193 if (docache && (ohash != nhash)) { 194 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache); 195 un->un_flags |= UN_CACHED; 196 } 197 198 union_list_unlock(nhash); 199} 200 201static void 202union_newlower(un, lowervp) 203 struct union_node *un; 204 struct vnode *lowervp; 205{ 206 207 union_updatevp(un, un->un_uppervp, lowervp); 208} 209 210static void 211union_newupper(un, uppervp) 212 struct union_node *un; 213 struct vnode *uppervp; 214{ 215 216 union_updatevp(un, uppervp, un->un_lowervp); 217} 218 219/* 220 * Keep track of size changes in the underlying vnodes. 221 * If the size changes, then callback to the vm layer 222 * giving priority to the upper layer size. 223 */ 224void 225union_newsize(vp, uppersz, lowersz) 226 struct vnode *vp; 227 off_t uppersz, lowersz; 228{ 229 struct union_node *un; 230 off_t sz; 231 232 /* only interested in regular files */ 233 if (vp->v_type != VREG) 234 return; 235 236 un = VTOUNION(vp); 237 sz = VNOVAL; 238 239 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) { 240 un->un_uppersz = uppersz; 241 if (sz == VNOVAL) 242 sz = un->un_uppersz; 243 } 244 245 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) { 246 un->un_lowersz = lowersz; 247 if (sz == VNOVAL) 248 sz = un->un_lowersz; 249 } 250 251 if (sz != VNOVAL) { 252#ifdef UNION_DIAGNOSTIC 253 printf("union: %s size now %ld\n", 254 uppersz != VNOVAL ? "upper" : "lower", (long) sz); 255#endif 256 vnode_pager_setsize(vp, sz); 257 } 258} 259 260/* 261 * allocate a union_node/vnode pair. the vnode is 262 * referenced and locked. the new vnode is returned 263 * via (vpp). (mp) is the mountpoint of the union filesystem, 264 * (dvp) is the parent directory where the upper layer object 265 * should exist (but doesn't) and (cnp) is the componentname 266 * information which is partially copied to allow the upper 267 * layer object to be created at a later time. (uppervp) 268 * and (lowervp) reference the upper and lower layer objects 269 * being mapped. either, but not both, can be nil. 270 * if supplied, (uppervp) is locked. 271 * the reference is either maintained in the new union_node 272 * object which is allocated, or they are vrele'd. 273 * 274 * all union_nodes are maintained on a singly-linked 275 * list. new nodes are only allocated when they cannot 276 * be found on this list. entries on the list are 277 * removed when the vfs reclaim entry is called. 278 * 279 * a single lock is kept for the entire list. this is 280 * needed because the getnewvnode() function can block 281 * waiting for a vnode to become free, in which case there 282 * may be more than one process trying to get the same 283 * vnode. this lock is only taken if we are going to 284 * call getnewvnode, since the kernel itself is single-threaded. 285 * 286 * if an entry is found on the list, then call vget() to 287 * take a reference. this is done because there may be 288 * zero references to it and so it needs to removed from 289 * the vnode free list. 290 */ 291int 292union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) 293 struct vnode **vpp; 294 struct mount *mp; 295 struct vnode *undvp; /* parent union vnode */ 296 struct vnode *dvp; /* may be null */ 297 struct componentname *cnp; /* may be null */ 298 struct vnode *uppervp; /* may be null */ 299 struct vnode *lowervp; /* may be null */ 300 int docache; 301{ 302 int error; 303 struct union_node *un = 0; 304 struct vnode *xlowervp = NULLVP; 305 struct union_mount *um = MOUNTTOUNIONMOUNT(mp); 306 int hash; 307 int vflag; 308 int try; 309 310 if (uppervp == NULLVP && lowervp == NULLVP) 311 panic("union: unidentifiable allocation"); 312 313 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { 314 xlowervp = lowervp; 315 lowervp = NULLVP; 316 } 317 318 /* detect the root vnode (and aliases) */ 319 vflag = 0; 320 if ((uppervp == um->um_uppervp) && 321 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) { 322 if (lowervp == NULLVP) { 323 lowervp = um->um_lowervp; 324 if (lowervp != NULLVP) 325 VREF(lowervp); 326 } 327 vflag = VROOT; 328 } 329 330loop: 331 if (!docache) { 332 un = 0; 333 } else for (try = 0; try < 3; try++) { 334 switch (try) { 335 case 0: 336 if (lowervp == NULLVP) 337 continue; 338 hash = UNION_HASH(uppervp, lowervp); 339 break; 340 341 case 1: 342 if (uppervp == NULLVP) 343 continue; 344 hash = UNION_HASH(uppervp, NULLVP); 345 break; 346 347 case 2: 348 if (lowervp == NULLVP) 349 continue; 350 hash = UNION_HASH(NULLVP, lowervp); 351 break; 352 } 353 354 while (union_list_lock(hash)) 355 continue; 356 357 for (un = unhead[hash].lh_first; un != 0; 358 un = un->un_cache.le_next) { 359 if ((un->un_lowervp == lowervp || 360 un->un_lowervp == NULLVP) && 361 (un->un_uppervp == uppervp || 362 un->un_uppervp == NULLVP) && 363 (UNIONTOV(un)->v_mount == mp)) { 364 if (vget(UNIONTOV(un), 0, 365 cnp ? cnp->cn_proc : NULL)) { 366 union_list_unlock(hash); 367 goto loop; 368 } 369 break; 370 } 371 } 372 373 union_list_unlock(hash); 374 375 if (un) 376 break; 377 } 378 379 if (un) { 380 /* 381 * Obtain a lock on the union_node. 382 * uppervp is locked, though un->un_uppervp 383 * may not be. this doesn't break the locking 384 * hierarchy since in the case that un->un_uppervp 385 * is not yet locked it will be vrele'd and replaced 386 * with uppervp. 387 */ 388 389 if ((dvp != NULLVP) && (uppervp == dvp)) { 390 /* 391 * Access ``.'', so (un) will already 392 * be locked. Since this process has 393 * the lock on (uppervp) no other 394 * process can hold the lock on (un). 395 */ 396#ifdef DIAGNOSTIC 397 if ((un->un_flags & UN_LOCKED) == 0) 398 panic("union: . not locked"); 399 else if (curproc && un->un_pid != curproc->p_pid && 400 un->un_pid > -1 && curproc->p_pid > -1) 401 panic("union: allocvp not lock owner"); 402#endif 403 } else { 404 if (un->un_flags & UN_LOCKED) { 405 vrele(UNIONTOV(un)); 406 un->un_flags |= UN_WANT; 407 (void) tsleep((caddr_t) &un->un_flags, PINOD, "unalvp", 0); 408 goto loop; 409 } 410 un->un_flags |= UN_LOCKED; 411 412#ifdef DIAGNOSTIC 413 if (curproc) 414 un->un_pid = curproc->p_pid; 415 else 416 un->un_pid = -1; 417#endif 418 } 419 420 /* 421 * At this point, the union_node is locked, 422 * un->un_uppervp may not be locked, and uppervp 423 * is locked or nil. 424 */ 425 426 /* 427 * Save information about the upper layer. 428 */ 429 if (uppervp != un->un_uppervp) { 430 union_newupper(un, uppervp); 431 } else if (uppervp) { 432 vrele(uppervp); 433 } 434 435 if (un->un_uppervp) { 436 un->un_flags |= UN_ULOCK; 437 un->un_flags &= ~UN_KLOCK; 438 } 439 440 /* 441 * Save information about the lower layer. 442 * This needs to keep track of pathname 443 * and directory information which union_vn_create 444 * might need. 445 */ 446 if (lowervp != un->un_lowervp) { 447 union_newlower(un, lowervp); 448 if (cnp && (lowervp != NULLVP)) { 449 un->un_hash = cnp->cn_hash; 450 un->un_path = malloc(cnp->cn_namelen+1, 451 M_TEMP, M_WAITOK); 452 bcopy(cnp->cn_nameptr, un->un_path, 453 cnp->cn_namelen); 454 un->un_path[cnp->cn_namelen] = '\0'; 455 VREF(dvp); 456 un->un_dirvp = dvp; 457 } 458 } else if (lowervp) { 459 vrele(lowervp); 460 } 461 *vpp = UNIONTOV(un); 462 return (0); 463 } 464 465 if (docache) { 466 /* 467 * otherwise lock the vp list while we call getnewvnode 468 * since that can block. 469 */ 470 hash = UNION_HASH(uppervp, lowervp); 471 472 if (union_list_lock(hash)) 473 goto loop; 474 } 475 476 error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp); 477 if (error) { 478 if (uppervp) { 479 if (dvp == uppervp) 480 vrele(uppervp); 481 else 482 vput(uppervp); 483 } 484 if (lowervp) 485 vrele(lowervp); 486 487 goto out; 488 } 489 490 MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), 491 M_TEMP, M_WAITOK); 492 493 (*vpp)->v_flag |= vflag; 494 if (uppervp) 495 (*vpp)->v_type = uppervp->v_type; 496 else 497 (*vpp)->v_type = lowervp->v_type; 498 un = VTOUNION(*vpp); 499 un->un_vnode = *vpp; 500 un->un_uppervp = uppervp; 501 un->un_uppersz = VNOVAL; 502 un->un_lowervp = lowervp; 503 un->un_lowersz = VNOVAL; 504 un->un_pvp = undvp; 505 if (undvp != NULLVP) 506 VREF(undvp); 507 un->un_dircache = 0; 508 un->un_openl = 0; 509 un->un_flags = UN_LOCKED; 510 if (un->un_uppervp) 511 un->un_flags |= UN_ULOCK; 512#ifdef DIAGNOSTIC 513 if (curproc) 514 un->un_pid = curproc->p_pid; 515 else 516 un->un_pid = -1; 517#endif 518 if (cnp && (lowervp != NULLVP)) { 519 un->un_hash = cnp->cn_hash; 520 un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); 521 bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); 522 un->un_path[cnp->cn_namelen] = '\0'; 523 VREF(dvp); 524 un->un_dirvp = dvp; 525 } else { 526 un->un_hash = 0; 527 un->un_path = 0; 528 un->un_dirvp = 0; 529 } 530 531 if (docache) { 532 LIST_INSERT_HEAD(&unhead[hash], un, un_cache); 533 un->un_flags |= UN_CACHED; 534 } 535 536 if (xlowervp) 537 vrele(xlowervp); 538 539out: 540 if (docache) 541 union_list_unlock(hash); 542 543 return (error); 544} 545 546int 547union_freevp(vp) 548 struct vnode *vp; 549{ 550 struct union_node *un = VTOUNION(vp); 551 552 if (un->un_flags & UN_CACHED) { 553 un->un_flags &= ~UN_CACHED; 554 LIST_REMOVE(un, un_cache); 555 } 556 557 if (un->un_pvp != NULLVP) 558 vrele(un->un_pvp); 559 if (un->un_uppervp != NULLVP) 560 vrele(un->un_uppervp); 561 if (un->un_lowervp != NULLVP) 562 vrele(un->un_lowervp); 563 if (un->un_dirvp != NULLVP) 564 vrele(un->un_dirvp); 565 if (un->un_path) 566 free(un->un_path, M_TEMP); 567 568 FREE(vp->v_data, M_TEMP); 569 vp->v_data = 0; 570 571 return (0); 572} 573 574/* 575 * copyfile. copy the vnode (fvp) to the vnode (tvp) 576 * using a sequence of reads and writes. both (fvp) 577 * and (tvp) are locked on entry and exit. 578 */ 579static int 580union_copyfile(fvp, tvp, cred, p) 581 struct vnode *fvp; 582 struct vnode *tvp; 583 struct ucred *cred; 584 struct proc *p; 585{ 586 char *buf; 587 struct uio uio; 588 struct iovec iov; 589 int error = 0; 590 591 /* 592 * strategy: 593 * allocate a buffer of size MAXBSIZE. 594 * loop doing reads and writes, keeping track 595 * of the current uio offset. 596 * give up at the first sign of trouble. 597 */ 598 599 uio.uio_procp = p; 600 uio.uio_segflg = UIO_SYSSPACE; 601 uio.uio_offset = 0; 602 603 VOP_UNLOCK(fvp, 0, p); /* XXX */ 604 VOP_LEASE(fvp, p, cred, LEASE_READ); 605 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ 606 VOP_UNLOCK(tvp, 0, p); /* XXX */ 607 VOP_LEASE(tvp, p, cred, LEASE_WRITE); 608 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ 609 610 buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); 611 612 /* ugly loop follows... */ 613 do { 614 off_t offset = uio.uio_offset; 615 616 uio.uio_iov = &iov; 617 uio.uio_iovcnt = 1; 618 iov.iov_base = buf; 619 iov.iov_len = MAXBSIZE; 620 uio.uio_resid = iov.iov_len; 621 uio.uio_rw = UIO_READ; 622 error = VOP_READ(fvp, &uio, 0, cred); 623 624 if (error == 0) { 625 uio.uio_iov = &iov; 626 uio.uio_iovcnt = 1; 627 iov.iov_base = buf; 628 iov.iov_len = MAXBSIZE - uio.uio_resid; 629 uio.uio_offset = offset; 630 uio.uio_rw = UIO_WRITE; 631 uio.uio_resid = iov.iov_len; 632 633 if (uio.uio_resid == 0) 634 break; 635 636 do { 637 error = VOP_WRITE(tvp, &uio, 0, cred); 638 } while ((uio.uio_resid > 0) && (error == 0)); 639 } 640 641 } while (error == 0); 642 643 free(buf, M_TEMP); 644 return (error); 645} 646 647/* 648 * (un) is assumed to be locked on entry and remains 649 * locked on exit. 650 */ 651int 652union_copyup(un, docopy, cred, p) 653 struct union_node *un; 654 int docopy; 655 struct ucred *cred; 656 struct proc *p; 657{ 658 int error; 659 struct vnode *lvp, *uvp; 660 661 /* 662 * If the user does not have read permission, the vnode should not 663 * be copied to upper layer. 664 */ 665 vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, p); 666 error = VOP_ACCESS(un->un_lowervp, VREAD, cred, p); 667 VOP_UNLOCK(un->un_lowervp, 0, p); 668 if (error) 669 return (error); 670 671 error = union_vn_create(&uvp, un, p); 672 if (error) 673 return (error); 674 675 /* at this point, uppervp is locked */ 676 union_newupper(un, uvp); 677 un->un_flags |= UN_ULOCK; 678 679 lvp = un->un_lowervp; 680 681 if (docopy) { 682 /* 683 * XX - should not ignore errors 684 * from VOP_CLOSE 685 */ 686 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p); 687 error = VOP_OPEN(lvp, FREAD, cred, p); 688 if (error == 0) { 689 error = union_copyfile(lvp, uvp, cred, p); 690 VOP_UNLOCK(lvp, 0, p); 691 (void) VOP_CLOSE(lvp, FREAD, cred, p); 692 } 693#ifdef UNION_DIAGNOSTIC 694 if (error == 0) 695 uprintf("union: copied up %s\n", un->un_path); 696#endif 697 698 } 699 un->un_flags &= ~UN_ULOCK; 700 VOP_UNLOCK(uvp, 0, p); 701 union_vn_close(uvp, FWRITE, cred, p); 702 vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p); 703 un->un_flags |= UN_ULOCK; 704 705 /* 706 * Subsequent IOs will go to the top layer, so 707 * call close on the lower vnode and open on the 708 * upper vnode to ensure that the filesystem keeps 709 * its references counts right. This doesn't do 710 * the right thing with (cred) and (FREAD) though. 711 * Ignoring error returns is not right, either. 712 */ 713 if (error == 0) { 714 int i; 715 716 for (i = 0; i < un->un_openl; i++) { 717 (void) VOP_CLOSE(lvp, FREAD, cred, p); 718 (void) VOP_OPEN(uvp, FREAD, cred, p); 719 } 720 un->un_openl = 0; 721 } 722 723 return (error); 724 725} 726 727static int 728union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) 729 struct union_mount *um; 730 struct vnode *dvp; 731 struct vnode **vpp; 732 struct componentname *cnp; 733 struct componentname *cn; 734 char *path; 735 int pathlen; 736{ 737 int error; 738 739 /* 740 * A new componentname structure must be faked up because 741 * there is no way to know where the upper level cnp came 742 * from or what it is being used for. This must duplicate 743 * some of the work done by NDINIT, some of the work done 744 * by namei, some of the work done by lookup and some of 745 * the work done by VOP_LOOKUP when given a CREATE flag. 746 * Conclusion: Horrible. 747 * 748 * The pathname buffer will be FREEed by VOP_MKDIR. 749 */ 750 cn->cn_namelen = pathlen; 751 cn->cn_pnbuf = zalloc(namei_zone); 752 bcopy(path, cn->cn_pnbuf, cn->cn_namelen); 753 cn->cn_pnbuf[cn->cn_namelen] = '\0'; 754 755 cn->cn_nameiop = CREATE; 756 cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); 757 cn->cn_proc = cnp->cn_proc; 758 if (um->um_op == UNMNT_ABOVE) 759 cn->cn_cred = cnp->cn_cred; 760 else 761 cn->cn_cred = um->um_cred; 762 cn->cn_nameptr = cn->cn_pnbuf; 763 cn->cn_hash = cnp->cn_hash; 764 cn->cn_consume = cnp->cn_consume; 765 766 VREF(dvp); 767 error = relookup(dvp, vpp, cn); 768 if (!error) 769 vrele(dvp); 770 else { 771 zfree(namei_zone, cn->cn_pnbuf); 772 cn->cn_pnbuf = '\0'; 773 } 774 775 return (error); 776} 777 778/* 779 * Create a shadow directory in the upper layer. 780 * The new vnode is returned locked. 781 * 782 * (um) points to the union mount structure for access to the 783 * the mounting process's credentials. 784 * (dvp) is the directory in which to create the shadow directory. 785 * it is unlocked on entry and exit. 786 * (cnp) is the componentname to be created. 787 * (vpp) is the returned newly created shadow directory, which 788 * is returned locked. 789 */ 790int 791union_mkshadow(um, dvp, cnp, vpp) 792 struct union_mount *um; 793 struct vnode *dvp; 794 struct componentname *cnp; 795 struct vnode **vpp; 796{ 797 int error; 798 struct vattr va; 799 struct proc *p = cnp->cn_proc; 800 struct componentname cn; 801 802 error = union_relookup(um, dvp, vpp, cnp, &cn, 803 cnp->cn_nameptr, cnp->cn_namelen); 804 if (error) 805 return (error); 806 807 if (*vpp) { 808 VOP_ABORTOP(dvp, &cn); 809 VOP_UNLOCK(dvp, 0, p); 810 vrele(*vpp); 811 *vpp = NULLVP; 812 return (EEXIST); 813 } 814 815 /* 816 * policy: when creating the shadow directory in the 817 * upper layer, create it owned by the user who did 818 * the mount, group from parent directory, and mode 819 * 777 modified by umask (ie mostly identical to the 820 * mkdir syscall). (jsp, kb) 821 */ 822 823 VATTR_NULL(&va); 824 va.va_type = VDIR; 825 va.va_mode = um->um_cmode; 826 827 /* VOP_LEASE: dvp is locked */ 828 VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE); 829 830 error = VOP_MKDIR(dvp, vpp, &cn, &va); 831 return (error); 832} 833 834/* 835 * Create a whiteout entry in the upper layer. 836 * 837 * (um) points to the union mount structure for access to the 838 * the mounting process's credentials. 839 * (dvp) is the directory in which to create the whiteout. 840 * it is locked on entry and exit. 841 * (cnp) is the componentname to be created. 842 */ 843int 844union_mkwhiteout(um, dvp, cnp, path) 845 struct union_mount *um; 846 struct vnode *dvp; 847 struct componentname *cnp; 848 char *path; 849{ 850 int error; 851 struct proc *p = cnp->cn_proc; 852 struct vnode *wvp; 853 struct componentname cn; 854 855 VOP_UNLOCK(dvp, 0, p); 856 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); 857 if (error) { 858 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); 859 return (error); 860 } 861 862 if (wvp) { 863 VOP_ABORTOP(dvp, &cn); 864 vrele(dvp); 865 vrele(wvp); 866 return (EEXIST); 867 } 868 869 /* VOP_LEASE: dvp is locked */ 870 VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE); 871 872 error = VOP_WHITEOUT(dvp, &cn, CREATE); 873 if (error) 874 VOP_ABORTOP(dvp, &cn); 875 876 vrele(dvp); 877 878 return (error); 879} 880 881/* 882 * union_vn_create: creates and opens a new shadow file 883 * on the upper union layer. this function is similar 884 * in spirit to calling vn_open but it avoids calling namei(). 885 * the problem with calling namei is that a) it locks too many 886 * things, and b) it doesn't start at the "right" directory, 887 * whereas relookup is told where to start. 888 */ 889static int 890union_vn_create(vpp, un, p) 891 struct vnode **vpp; 892 struct union_node *un; 893 struct proc *p; 894{ 895 struct vnode *vp; 896 struct ucred *cred = p->p_ucred; 897 struct vattr vat; 898 struct vattr *vap = &vat; 899 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); 900 int error; 901 int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask; 902 struct componentname cn; 903 904 *vpp = NULLVP; 905 906 /* 907 * Build a new componentname structure (for the same 908 * reasons outlines in union_mkshadow). 909 * The difference here is that the file is owned by 910 * the current user, rather than by the person who 911 * did the mount, since the current user needs to be 912 * able to write the file (that's why it is being 913 * copied in the first place). 914 */ 915 cn.cn_namelen = strlen(un->un_path); 916 cn.cn_pnbuf = zalloc(namei_zone); 917 bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); 918 cn.cn_nameiop = CREATE; 919 cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); 920 cn.cn_proc = p; 921 cn.cn_cred = p->p_ucred; 922 cn.cn_nameptr = cn.cn_pnbuf; 923 cn.cn_hash = un->un_hash; 924 cn.cn_consume = 0; 925 926 VREF(un->un_dirvp); 927 error = relookup(un->un_dirvp, &vp, &cn); 928 if (error) 929 return (error); 930 vrele(un->un_dirvp); 931 932 if (vp) { 933 VOP_ABORTOP(un->un_dirvp, &cn); 934 if (un->un_dirvp == vp) 935 vrele(un->un_dirvp); 936 else 937 vput(un->un_dirvp); 938 vrele(vp); 939 return (EEXIST); 940 } 941 942 /* 943 * Good - there was no race to create the file 944 * so go ahead and create it. The permissions 945 * on the file will be 0666 modified by the 946 * current user's umask. Access to the file, while 947 * it is unioned, will require access to the top *and* 948 * bottom files. Access when not unioned will simply 949 * require access to the top-level file. 950 * TODO: confirm choice of access permissions. 951 */ 952 VATTR_NULL(vap); 953 vap->va_type = VREG; 954 vap->va_mode = cmode; 955 VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE); 956 if (error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap)) 957 return (error); 958 959 error = VOP_OPEN(vp, fmode, cred, p); 960 if (error) { 961 vput(vp); 962 return (error); 963 } 964 965 vp->v_writecount++; 966 *vpp = vp; 967 return (0); 968} 969 970static int 971union_vn_close(vp, fmode, cred, p) 972 struct vnode *vp; 973 int fmode; 974 struct ucred *cred; 975 struct proc *p; 976{ 977 978 if (fmode & FWRITE) 979 --vp->v_writecount; 980 return (VOP_CLOSE(vp, fmode, cred, p)); 981} 982 983void 984union_removed_upper(un) 985 struct union_node *un; 986{ 987 struct proc *p = curproc; /* XXX */ 988 struct vnode **vpp; 989 990 /* 991 * Do not set the uppervp to NULLVP. If lowervp is NULLVP, 992 * union node will have neither uppervp nor lowervp. We romove 993 * the union node from cache, so that it will not be referrenced. 994 */ 995#if 0 996 union_newupper(un, NULLVP); 997#endif 998 if (un->un_dircache != 0) { 999 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) 1000 vrele(*vpp); 1001 free(un->un_dircache, M_TEMP); 1002 un->un_dircache = 0; 1003 } 1004 1005 if (un->un_flags & UN_CACHED) { 1006 un->un_flags &= ~UN_CACHED; 1007 LIST_REMOVE(un, un_cache); 1008 } 1009 1010 if (un->un_flags & UN_ULOCK) { 1011 un->un_flags &= ~UN_ULOCK; 1012 VOP_UNLOCK(un->un_uppervp, 0, p); 1013 } 1014} 1015 1016#if 0 1017struct vnode * 1018union_lowervp(vp) 1019 struct vnode *vp; 1020{ 1021 struct union_node *un = VTOUNION(vp); 1022 1023 if ((un->un_lowervp != NULLVP) && 1024 (vp->v_type == un->un_lowervp->v_type)) { 1025 if (vget(un->un_lowervp, 0) == 0) 1026 return (un->un_lowervp); 1027 } 1028 1029 return (NULLVP); 1030} 1031#endif 1032 1033/* 1034 * determine whether a whiteout is needed 1035 * during a remove/rmdir operation. 1036 */ 1037int 1038union_dowhiteout(un, cred, p) 1039 struct union_node *un; 1040 struct ucred *cred; 1041 struct proc *p; 1042{ 1043 struct vattr va; 1044 1045 if (un->un_lowervp != NULLVP) 1046 return (1); 1047 1048 if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 && 1049 (va.va_flags & OPAQUE)) 1050 return (1); 1051 1052 return (0); 1053} 1054 1055static void 1056union_dircache_r(vp, vppp, cntp) 1057 struct vnode *vp; 1058 struct vnode ***vppp; 1059 int *cntp; 1060{ 1061 struct union_node *un; 1062 1063 if (vp->v_op != union_vnodeop_p) { 1064 if (vppp) { 1065 VREF(vp); 1066 *(*vppp)++ = vp; 1067 if (--(*cntp) == 0) 1068 panic("union: dircache table too small"); 1069 } else { 1070 (*cntp)++; 1071 } 1072 1073 return; 1074 } 1075 1076 un = VTOUNION(vp); 1077 if (un->un_uppervp != NULLVP) 1078 union_dircache_r(un->un_uppervp, vppp, cntp); 1079 if (un->un_lowervp != NULLVP) 1080 union_dircache_r(un->un_lowervp, vppp, cntp); 1081} 1082 1083struct vnode * 1084union_dircache(vp, p) 1085 struct vnode *vp; 1086 struct proc *p; 1087{ 1088 int cnt; 1089 struct vnode *nvp; 1090 struct vnode **vpp; 1091 struct vnode **dircache; 1092 struct union_node *un; 1093 int error; 1094 1095 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 1096 dircache = VTOUNION(vp)->un_dircache; 1097 1098 nvp = NULLVP; 1099 1100 if (dircache == 0) { 1101 cnt = 0; 1102 union_dircache_r(vp, 0, &cnt); 1103 cnt++; 1104 dircache = (struct vnode **) 1105 malloc(cnt * sizeof(struct vnode *), 1106 M_TEMP, M_WAITOK); 1107 vpp = dircache; 1108 union_dircache_r(vp, &vpp, &cnt); 1109 *vpp = NULLVP; 1110 vpp = dircache + 1; 1111 } else { 1112 vpp = dircache; 1113 do { 1114 if (*vpp++ == VTOUNION(vp)->un_uppervp) 1115 break; 1116 } while (*vpp != NULLVP); 1117 } 1118 1119 if (*vpp == NULLVP) 1120 goto out; 1121 1122 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); 1123 VREF(*vpp); 1124 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0); 1125 if (error) 1126 goto out; 1127 1128 VTOUNION(vp)->un_dircache = 0; 1129 un = VTOUNION(nvp); 1130 un->un_dircache = dircache; 1131 1132out: 1133 VOP_UNLOCK(vp, 0, p); 1134 return (nvp); 1135}
| 53#include <miscfs/union/union.h> 54 55#include <sys/proc.h> 56 57extern int union_init __P((void)); 58 59/* must be power of two, otherwise change UNION_HASH() */ 60#define NHASH 32 61 62/* unsigned int ... */ 63#define UNION_HASH(u, l) \ 64 (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1)) 65 66static LIST_HEAD(unhead, union_node) unhead[NHASH]; 67static int unvplock[NHASH]; 68 69static void union_dircache_r __P((struct vnode *vp, struct vnode ***vppp, 70 int *cntp)); 71static int union_list_lock __P((int ix)); 72static void union_list_unlock __P((int ix)); 73static int union_relookup __P((struct union_mount *um, struct vnode *dvp, 74 struct vnode **vpp, 75 struct componentname *cnp, 76 struct componentname *cn, char *path, 77 int pathlen)); 78static void union_updatevp __P((struct union_node *un, 79 struct vnode *uppervp, 80 struct vnode *lowervp)); 81static void union_newlower __P((struct union_node *, struct vnode *)); 82static void union_newupper __P((struct union_node *, struct vnode *)); 83static int union_copyfile __P((struct vnode *, struct vnode *, 84 struct ucred *, struct proc *)); 85static int union_vn_create __P((struct vnode **, struct union_node *, 86 struct proc *)); 87static int union_vn_close __P((struct vnode *, int, struct ucred *, 88 struct proc *)); 89 90int 91union_init() 92{ 93 int i; 94 95 for (i = 0; i < NHASH; i++) 96 LIST_INIT(&unhead[i]); 97 bzero((caddr_t) unvplock, sizeof(unvplock)); 98 return (0); 99} 100 101static int 102union_list_lock(ix) 103 int ix; 104{ 105 106 if (unvplock[ix] & UN_LOCKED) { 107 unvplock[ix] |= UN_WANT; 108 (void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0); 109 return (1); 110 } 111 112 unvplock[ix] |= UN_LOCKED; 113 114 return (0); 115} 116 117static void 118union_list_unlock(ix) 119 int ix; 120{ 121 122 unvplock[ix] &= ~UN_LOCKED; 123 124 if (unvplock[ix] & UN_WANT) { 125 unvplock[ix] &= ~UN_WANT; 126 wakeup((caddr_t) &unvplock[ix]); 127 } 128} 129 130static void 131union_updatevp(un, uppervp, lowervp) 132 struct union_node *un; 133 struct vnode *uppervp; 134 struct vnode *lowervp; 135{ 136 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); 137 int nhash = UNION_HASH(uppervp, lowervp); 138 int docache = (lowervp != NULLVP || uppervp != NULLVP); 139 int lhash, uhash; 140 141 /* 142 * Ensure locking is ordered from lower to higher 143 * to avoid deadlocks. 144 */ 145 if (nhash < ohash) { 146 lhash = nhash; 147 uhash = ohash; 148 } else { 149 lhash = ohash; 150 uhash = nhash; 151 } 152 153 if (lhash != uhash) 154 while (union_list_lock(lhash)) 155 continue; 156 157 while (union_list_lock(uhash)) 158 continue; 159 160 if (ohash != nhash || !docache) { 161 if (un->un_flags & UN_CACHED) { 162 un->un_flags &= ~UN_CACHED; 163 LIST_REMOVE(un, un_cache); 164 } 165 } 166 167 if (ohash != nhash) 168 union_list_unlock(ohash); 169 170 if (un->un_lowervp != lowervp) { 171 if (un->un_lowervp) { 172 vrele(un->un_lowervp); 173 if (un->un_path) { 174 free(un->un_path, M_TEMP); 175 un->un_path = 0; 176 } 177 if (un->un_dirvp) { 178 vrele(un->un_dirvp); 179 un->un_dirvp = NULLVP; 180 } 181 } 182 un->un_lowervp = lowervp; 183 un->un_lowersz = VNOVAL; 184 } 185 186 if (un->un_uppervp != uppervp) { 187 if (un->un_uppervp) 188 vrele(un->un_uppervp); 189 190 un->un_uppervp = uppervp; 191 un->un_uppersz = VNOVAL; 192 } 193 194 if (docache && (ohash != nhash)) { 195 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache); 196 un->un_flags |= UN_CACHED; 197 } 198 199 union_list_unlock(nhash); 200} 201 202static void 203union_newlower(un, lowervp) 204 struct union_node *un; 205 struct vnode *lowervp; 206{ 207 208 union_updatevp(un, un->un_uppervp, lowervp); 209} 210 211static void 212union_newupper(un, uppervp) 213 struct union_node *un; 214 struct vnode *uppervp; 215{ 216 217 union_updatevp(un, uppervp, un->un_lowervp); 218} 219 220/* 221 * Keep track of size changes in the underlying vnodes. 222 * If the size changes, then callback to the vm layer 223 * giving priority to the upper layer size. 224 */ 225void 226union_newsize(vp, uppersz, lowersz) 227 struct vnode *vp; 228 off_t uppersz, lowersz; 229{ 230 struct union_node *un; 231 off_t sz; 232 233 /* only interested in regular files */ 234 if (vp->v_type != VREG) 235 return; 236 237 un = VTOUNION(vp); 238 sz = VNOVAL; 239 240 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) { 241 un->un_uppersz = uppersz; 242 if (sz == VNOVAL) 243 sz = un->un_uppersz; 244 } 245 246 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) { 247 un->un_lowersz = lowersz; 248 if (sz == VNOVAL) 249 sz = un->un_lowersz; 250 } 251 252 if (sz != VNOVAL) { 253#ifdef UNION_DIAGNOSTIC 254 printf("union: %s size now %ld\n", 255 uppersz != VNOVAL ? "upper" : "lower", (long) sz); 256#endif 257 vnode_pager_setsize(vp, sz); 258 } 259} 260 261/* 262 * allocate a union_node/vnode pair. the vnode is 263 * referenced and locked. the new vnode is returned 264 * via (vpp). (mp) is the mountpoint of the union filesystem, 265 * (dvp) is the parent directory where the upper layer object 266 * should exist (but doesn't) and (cnp) is the componentname 267 * information which is partially copied to allow the upper 268 * layer object to be created at a later time. (uppervp) 269 * and (lowervp) reference the upper and lower layer objects 270 * being mapped. either, but not both, can be nil. 271 * if supplied, (uppervp) is locked. 272 * the reference is either maintained in the new union_node 273 * object which is allocated, or they are vrele'd. 274 * 275 * all union_nodes are maintained on a singly-linked 276 * list. new nodes are only allocated when they cannot 277 * be found on this list. entries on the list are 278 * removed when the vfs reclaim entry is called. 279 * 280 * a single lock is kept for the entire list. this is 281 * needed because the getnewvnode() function can block 282 * waiting for a vnode to become free, in which case there 283 * may be more than one process trying to get the same 284 * vnode. this lock is only taken if we are going to 285 * call getnewvnode, since the kernel itself is single-threaded. 286 * 287 * if an entry is found on the list, then call vget() to 288 * take a reference. this is done because there may be 289 * zero references to it and so it needs to removed from 290 * the vnode free list. 291 */ 292int 293union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) 294 struct vnode **vpp; 295 struct mount *mp; 296 struct vnode *undvp; /* parent union vnode */ 297 struct vnode *dvp; /* may be null */ 298 struct componentname *cnp; /* may be null */ 299 struct vnode *uppervp; /* may be null */ 300 struct vnode *lowervp; /* may be null */ 301 int docache; 302{ 303 int error; 304 struct union_node *un = 0; 305 struct vnode *xlowervp = NULLVP; 306 struct union_mount *um = MOUNTTOUNIONMOUNT(mp); 307 int hash; 308 int vflag; 309 int try; 310 311 if (uppervp == NULLVP && lowervp == NULLVP) 312 panic("union: unidentifiable allocation"); 313 314 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { 315 xlowervp = lowervp; 316 lowervp = NULLVP; 317 } 318 319 /* detect the root vnode (and aliases) */ 320 vflag = 0; 321 if ((uppervp == um->um_uppervp) && 322 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) { 323 if (lowervp == NULLVP) { 324 lowervp = um->um_lowervp; 325 if (lowervp != NULLVP) 326 VREF(lowervp); 327 } 328 vflag = VROOT; 329 } 330 331loop: 332 if (!docache) { 333 un = 0; 334 } else for (try = 0; try < 3; try++) { 335 switch (try) { 336 case 0: 337 if (lowervp == NULLVP) 338 continue; 339 hash = UNION_HASH(uppervp, lowervp); 340 break; 341 342 case 1: 343 if (uppervp == NULLVP) 344 continue; 345 hash = UNION_HASH(uppervp, NULLVP); 346 break; 347 348 case 2: 349 if (lowervp == NULLVP) 350 continue; 351 hash = UNION_HASH(NULLVP, lowervp); 352 break; 353 } 354 355 while (union_list_lock(hash)) 356 continue; 357 358 for (un = unhead[hash].lh_first; un != 0; 359 un = un->un_cache.le_next) { 360 if ((un->un_lowervp == lowervp || 361 un->un_lowervp == NULLVP) && 362 (un->un_uppervp == uppervp || 363 un->un_uppervp == NULLVP) && 364 (UNIONTOV(un)->v_mount == mp)) { 365 if (vget(UNIONTOV(un), 0, 366 cnp ? cnp->cn_proc : NULL)) { 367 union_list_unlock(hash); 368 goto loop; 369 } 370 break; 371 } 372 } 373 374 union_list_unlock(hash); 375 376 if (un) 377 break; 378 } 379 380 if (un) { 381 /* 382 * Obtain a lock on the union_node. 383 * uppervp is locked, though un->un_uppervp 384 * may not be. this doesn't break the locking 385 * hierarchy since in the case that un->un_uppervp 386 * is not yet locked it will be vrele'd and replaced 387 * with uppervp. 388 */ 389 390 if ((dvp != NULLVP) && (uppervp == dvp)) { 391 /* 392 * Access ``.'', so (un) will already 393 * be locked. Since this process has 394 * the lock on (uppervp) no other 395 * process can hold the lock on (un). 396 */ 397#ifdef DIAGNOSTIC 398 if ((un->un_flags & UN_LOCKED) == 0) 399 panic("union: . not locked"); 400 else if (curproc && un->un_pid != curproc->p_pid && 401 un->un_pid > -1 && curproc->p_pid > -1) 402 panic("union: allocvp not lock owner"); 403#endif 404 } else { 405 if (un->un_flags & UN_LOCKED) { 406 vrele(UNIONTOV(un)); 407 un->un_flags |= UN_WANT; 408 (void) tsleep((caddr_t) &un->un_flags, PINOD, "unalvp", 0); 409 goto loop; 410 } 411 un->un_flags |= UN_LOCKED; 412 413#ifdef DIAGNOSTIC 414 if (curproc) 415 un->un_pid = curproc->p_pid; 416 else 417 un->un_pid = -1; 418#endif 419 } 420 421 /* 422 * At this point, the union_node is locked, 423 * un->un_uppervp may not be locked, and uppervp 424 * is locked or nil. 425 */ 426 427 /* 428 * Save information about the upper layer. 429 */ 430 if (uppervp != un->un_uppervp) { 431 union_newupper(un, uppervp); 432 } else if (uppervp) { 433 vrele(uppervp); 434 } 435 436 if (un->un_uppervp) { 437 un->un_flags |= UN_ULOCK; 438 un->un_flags &= ~UN_KLOCK; 439 } 440 441 /* 442 * Save information about the lower layer. 443 * This needs to keep track of pathname 444 * and directory information which union_vn_create 445 * might need. 446 */ 447 if (lowervp != un->un_lowervp) { 448 union_newlower(un, lowervp); 449 if (cnp && (lowervp != NULLVP)) { 450 un->un_hash = cnp->cn_hash; 451 un->un_path = malloc(cnp->cn_namelen+1, 452 M_TEMP, M_WAITOK); 453 bcopy(cnp->cn_nameptr, un->un_path, 454 cnp->cn_namelen); 455 un->un_path[cnp->cn_namelen] = '\0'; 456 VREF(dvp); 457 un->un_dirvp = dvp; 458 } 459 } else if (lowervp) { 460 vrele(lowervp); 461 } 462 *vpp = UNIONTOV(un); 463 return (0); 464 } 465 466 if (docache) { 467 /* 468 * otherwise lock the vp list while we call getnewvnode 469 * since that can block. 470 */ 471 hash = UNION_HASH(uppervp, lowervp); 472 473 if (union_list_lock(hash)) 474 goto loop; 475 } 476 477 error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp); 478 if (error) { 479 if (uppervp) { 480 if (dvp == uppervp) 481 vrele(uppervp); 482 else 483 vput(uppervp); 484 } 485 if (lowervp) 486 vrele(lowervp); 487 488 goto out; 489 } 490 491 MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), 492 M_TEMP, M_WAITOK); 493 494 (*vpp)->v_flag |= vflag; 495 if (uppervp) 496 (*vpp)->v_type = uppervp->v_type; 497 else 498 (*vpp)->v_type = lowervp->v_type; 499 un = VTOUNION(*vpp); 500 un->un_vnode = *vpp; 501 un->un_uppervp = uppervp; 502 un->un_uppersz = VNOVAL; 503 un->un_lowervp = lowervp; 504 un->un_lowersz = VNOVAL; 505 un->un_pvp = undvp; 506 if (undvp != NULLVP) 507 VREF(undvp); 508 un->un_dircache = 0; 509 un->un_openl = 0; 510 un->un_flags = UN_LOCKED; 511 if (un->un_uppervp) 512 un->un_flags |= UN_ULOCK; 513#ifdef DIAGNOSTIC 514 if (curproc) 515 un->un_pid = curproc->p_pid; 516 else 517 un->un_pid = -1; 518#endif 519 if (cnp && (lowervp != NULLVP)) { 520 un->un_hash = cnp->cn_hash; 521 un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); 522 bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); 523 un->un_path[cnp->cn_namelen] = '\0'; 524 VREF(dvp); 525 un->un_dirvp = dvp; 526 } else { 527 un->un_hash = 0; 528 un->un_path = 0; 529 un->un_dirvp = 0; 530 } 531 532 if (docache) { 533 LIST_INSERT_HEAD(&unhead[hash], un, un_cache); 534 un->un_flags |= UN_CACHED; 535 } 536 537 if (xlowervp) 538 vrele(xlowervp); 539 540out: 541 if (docache) 542 union_list_unlock(hash); 543 544 return (error); 545} 546 547int 548union_freevp(vp) 549 struct vnode *vp; 550{ 551 struct union_node *un = VTOUNION(vp); 552 553 if (un->un_flags & UN_CACHED) { 554 un->un_flags &= ~UN_CACHED; 555 LIST_REMOVE(un, un_cache); 556 } 557 558 if (un->un_pvp != NULLVP) 559 vrele(un->un_pvp); 560 if (un->un_uppervp != NULLVP) 561 vrele(un->un_uppervp); 562 if (un->un_lowervp != NULLVP) 563 vrele(un->un_lowervp); 564 if (un->un_dirvp != NULLVP) 565 vrele(un->un_dirvp); 566 if (un->un_path) 567 free(un->un_path, M_TEMP); 568 569 FREE(vp->v_data, M_TEMP); 570 vp->v_data = 0; 571 572 return (0); 573} 574 575/* 576 * copyfile. copy the vnode (fvp) to the vnode (tvp) 577 * using a sequence of reads and writes. both (fvp) 578 * and (tvp) are locked on entry and exit. 579 */ 580static int 581union_copyfile(fvp, tvp, cred, p) 582 struct vnode *fvp; 583 struct vnode *tvp; 584 struct ucred *cred; 585 struct proc *p; 586{ 587 char *buf; 588 struct uio uio; 589 struct iovec iov; 590 int error = 0; 591 592 /* 593 * strategy: 594 * allocate a buffer of size MAXBSIZE. 595 * loop doing reads and writes, keeping track 596 * of the current uio offset. 597 * give up at the first sign of trouble. 598 */ 599 600 uio.uio_procp = p; 601 uio.uio_segflg = UIO_SYSSPACE; 602 uio.uio_offset = 0; 603 604 VOP_UNLOCK(fvp, 0, p); /* XXX */ 605 VOP_LEASE(fvp, p, cred, LEASE_READ); 606 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ 607 VOP_UNLOCK(tvp, 0, p); /* XXX */ 608 VOP_LEASE(tvp, p, cred, LEASE_WRITE); 609 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ 610 611 buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); 612 613 /* ugly loop follows... */ 614 do { 615 off_t offset = uio.uio_offset; 616 617 uio.uio_iov = &iov; 618 uio.uio_iovcnt = 1; 619 iov.iov_base = buf; 620 iov.iov_len = MAXBSIZE; 621 uio.uio_resid = iov.iov_len; 622 uio.uio_rw = UIO_READ; 623 error = VOP_READ(fvp, &uio, 0, cred); 624 625 if (error == 0) { 626 uio.uio_iov = &iov; 627 uio.uio_iovcnt = 1; 628 iov.iov_base = buf; 629 iov.iov_len = MAXBSIZE - uio.uio_resid; 630 uio.uio_offset = offset; 631 uio.uio_rw = UIO_WRITE; 632 uio.uio_resid = iov.iov_len; 633 634 if (uio.uio_resid == 0) 635 break; 636 637 do { 638 error = VOP_WRITE(tvp, &uio, 0, cred); 639 } while ((uio.uio_resid > 0) && (error == 0)); 640 } 641 642 } while (error == 0); 643 644 free(buf, M_TEMP); 645 return (error); 646} 647 648/* 649 * (un) is assumed to be locked on entry and remains 650 * locked on exit. 651 */ 652int 653union_copyup(un, docopy, cred, p) 654 struct union_node *un; 655 int docopy; 656 struct ucred *cred; 657 struct proc *p; 658{ 659 int error; 660 struct vnode *lvp, *uvp; 661 662 /* 663 * If the user does not have read permission, the vnode should not 664 * be copied to upper layer. 665 */ 666 vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, p); 667 error = VOP_ACCESS(un->un_lowervp, VREAD, cred, p); 668 VOP_UNLOCK(un->un_lowervp, 0, p); 669 if (error) 670 return (error); 671 672 error = union_vn_create(&uvp, un, p); 673 if (error) 674 return (error); 675 676 /* at this point, uppervp is locked */ 677 union_newupper(un, uvp); 678 un->un_flags |= UN_ULOCK; 679 680 lvp = un->un_lowervp; 681 682 if (docopy) { 683 /* 684 * XX - should not ignore errors 685 * from VOP_CLOSE 686 */ 687 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p); 688 error = VOP_OPEN(lvp, FREAD, cred, p); 689 if (error == 0) { 690 error = union_copyfile(lvp, uvp, cred, p); 691 VOP_UNLOCK(lvp, 0, p); 692 (void) VOP_CLOSE(lvp, FREAD, cred, p); 693 } 694#ifdef UNION_DIAGNOSTIC 695 if (error == 0) 696 uprintf("union: copied up %s\n", un->un_path); 697#endif 698 699 } 700 un->un_flags &= ~UN_ULOCK; 701 VOP_UNLOCK(uvp, 0, p); 702 union_vn_close(uvp, FWRITE, cred, p); 703 vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p); 704 un->un_flags |= UN_ULOCK; 705 706 /* 707 * Subsequent IOs will go to the top layer, so 708 * call close on the lower vnode and open on the 709 * upper vnode to ensure that the filesystem keeps 710 * its references counts right. This doesn't do 711 * the right thing with (cred) and (FREAD) though. 712 * Ignoring error returns is not right, either. 713 */ 714 if (error == 0) { 715 int i; 716 717 for (i = 0; i < un->un_openl; i++) { 718 (void) VOP_CLOSE(lvp, FREAD, cred, p); 719 (void) VOP_OPEN(uvp, FREAD, cred, p); 720 } 721 un->un_openl = 0; 722 } 723 724 return (error); 725 726} 727 728static int 729union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) 730 struct union_mount *um; 731 struct vnode *dvp; 732 struct vnode **vpp; 733 struct componentname *cnp; 734 struct componentname *cn; 735 char *path; 736 int pathlen; 737{ 738 int error; 739 740 /* 741 * A new componentname structure must be faked up because 742 * there is no way to know where the upper level cnp came 743 * from or what it is being used for. This must duplicate 744 * some of the work done by NDINIT, some of the work done 745 * by namei, some of the work done by lookup and some of 746 * the work done by VOP_LOOKUP when given a CREATE flag. 747 * Conclusion: Horrible. 748 * 749 * The pathname buffer will be FREEed by VOP_MKDIR. 750 */ 751 cn->cn_namelen = pathlen; 752 cn->cn_pnbuf = zalloc(namei_zone); 753 bcopy(path, cn->cn_pnbuf, cn->cn_namelen); 754 cn->cn_pnbuf[cn->cn_namelen] = '\0'; 755 756 cn->cn_nameiop = CREATE; 757 cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); 758 cn->cn_proc = cnp->cn_proc; 759 if (um->um_op == UNMNT_ABOVE) 760 cn->cn_cred = cnp->cn_cred; 761 else 762 cn->cn_cred = um->um_cred; 763 cn->cn_nameptr = cn->cn_pnbuf; 764 cn->cn_hash = cnp->cn_hash; 765 cn->cn_consume = cnp->cn_consume; 766 767 VREF(dvp); 768 error = relookup(dvp, vpp, cn); 769 if (!error) 770 vrele(dvp); 771 else { 772 zfree(namei_zone, cn->cn_pnbuf); 773 cn->cn_pnbuf = '\0'; 774 } 775 776 return (error); 777} 778 779/* 780 * Create a shadow directory in the upper layer. 781 * The new vnode is returned locked. 782 * 783 * (um) points to the union mount structure for access to the 784 * the mounting process's credentials. 785 * (dvp) is the directory in which to create the shadow directory. 786 * it is unlocked on entry and exit. 787 * (cnp) is the componentname to be created. 788 * (vpp) is the returned newly created shadow directory, which 789 * is returned locked. 790 */ 791int 792union_mkshadow(um, dvp, cnp, vpp) 793 struct union_mount *um; 794 struct vnode *dvp; 795 struct componentname *cnp; 796 struct vnode **vpp; 797{ 798 int error; 799 struct vattr va; 800 struct proc *p = cnp->cn_proc; 801 struct componentname cn; 802 803 error = union_relookup(um, dvp, vpp, cnp, &cn, 804 cnp->cn_nameptr, cnp->cn_namelen); 805 if (error) 806 return (error); 807 808 if (*vpp) { 809 VOP_ABORTOP(dvp, &cn); 810 VOP_UNLOCK(dvp, 0, p); 811 vrele(*vpp); 812 *vpp = NULLVP; 813 return (EEXIST); 814 } 815 816 /* 817 * policy: when creating the shadow directory in the 818 * upper layer, create it owned by the user who did 819 * the mount, group from parent directory, and mode 820 * 777 modified by umask (ie mostly identical to the 821 * mkdir syscall). (jsp, kb) 822 */ 823 824 VATTR_NULL(&va); 825 va.va_type = VDIR; 826 va.va_mode = um->um_cmode; 827 828 /* VOP_LEASE: dvp is locked */ 829 VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE); 830 831 error = VOP_MKDIR(dvp, vpp, &cn, &va); 832 return (error); 833} 834 835/* 836 * Create a whiteout entry in the upper layer. 837 * 838 * (um) points to the union mount structure for access to the 839 * the mounting process's credentials. 840 * (dvp) is the directory in which to create the whiteout. 841 * it is locked on entry and exit. 842 * (cnp) is the componentname to be created. 843 */ 844int 845union_mkwhiteout(um, dvp, cnp, path) 846 struct union_mount *um; 847 struct vnode *dvp; 848 struct componentname *cnp; 849 char *path; 850{ 851 int error; 852 struct proc *p = cnp->cn_proc; 853 struct vnode *wvp; 854 struct componentname cn; 855 856 VOP_UNLOCK(dvp, 0, p); 857 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); 858 if (error) { 859 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); 860 return (error); 861 } 862 863 if (wvp) { 864 VOP_ABORTOP(dvp, &cn); 865 vrele(dvp); 866 vrele(wvp); 867 return (EEXIST); 868 } 869 870 /* VOP_LEASE: dvp is locked */ 871 VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE); 872 873 error = VOP_WHITEOUT(dvp, &cn, CREATE); 874 if (error) 875 VOP_ABORTOP(dvp, &cn); 876 877 vrele(dvp); 878 879 return (error); 880} 881 882/* 883 * union_vn_create: creates and opens a new shadow file 884 * on the upper union layer. this function is similar 885 * in spirit to calling vn_open but it avoids calling namei(). 886 * the problem with calling namei is that a) it locks too many 887 * things, and b) it doesn't start at the "right" directory, 888 * whereas relookup is told where to start. 889 */ 890static int 891union_vn_create(vpp, un, p) 892 struct vnode **vpp; 893 struct union_node *un; 894 struct proc *p; 895{ 896 struct vnode *vp; 897 struct ucred *cred = p->p_ucred; 898 struct vattr vat; 899 struct vattr *vap = &vat; 900 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); 901 int error; 902 int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask; 903 struct componentname cn; 904 905 *vpp = NULLVP; 906 907 /* 908 * Build a new componentname structure (for the same 909 * reasons outlines in union_mkshadow). 910 * The difference here is that the file is owned by 911 * the current user, rather than by the person who 912 * did the mount, since the current user needs to be 913 * able to write the file (that's why it is being 914 * copied in the first place). 915 */ 916 cn.cn_namelen = strlen(un->un_path); 917 cn.cn_pnbuf = zalloc(namei_zone); 918 bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); 919 cn.cn_nameiop = CREATE; 920 cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); 921 cn.cn_proc = p; 922 cn.cn_cred = p->p_ucred; 923 cn.cn_nameptr = cn.cn_pnbuf; 924 cn.cn_hash = un->un_hash; 925 cn.cn_consume = 0; 926 927 VREF(un->un_dirvp); 928 error = relookup(un->un_dirvp, &vp, &cn); 929 if (error) 930 return (error); 931 vrele(un->un_dirvp); 932 933 if (vp) { 934 VOP_ABORTOP(un->un_dirvp, &cn); 935 if (un->un_dirvp == vp) 936 vrele(un->un_dirvp); 937 else 938 vput(un->un_dirvp); 939 vrele(vp); 940 return (EEXIST); 941 } 942 943 /* 944 * Good - there was no race to create the file 945 * so go ahead and create it. The permissions 946 * on the file will be 0666 modified by the 947 * current user's umask. Access to the file, while 948 * it is unioned, will require access to the top *and* 949 * bottom files. Access when not unioned will simply 950 * require access to the top-level file. 951 * TODO: confirm choice of access permissions. 952 */ 953 VATTR_NULL(vap); 954 vap->va_type = VREG; 955 vap->va_mode = cmode; 956 VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE); 957 if (error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap)) 958 return (error); 959 960 error = VOP_OPEN(vp, fmode, cred, p); 961 if (error) { 962 vput(vp); 963 return (error); 964 } 965 966 vp->v_writecount++; 967 *vpp = vp; 968 return (0); 969} 970 971static int 972union_vn_close(vp, fmode, cred, p) 973 struct vnode *vp; 974 int fmode; 975 struct ucred *cred; 976 struct proc *p; 977{ 978 979 if (fmode & FWRITE) 980 --vp->v_writecount; 981 return (VOP_CLOSE(vp, fmode, cred, p)); 982} 983 984void 985union_removed_upper(un) 986 struct union_node *un; 987{ 988 struct proc *p = curproc; /* XXX */ 989 struct vnode **vpp; 990 991 /* 992 * Do not set the uppervp to NULLVP. If lowervp is NULLVP, 993 * union node will have neither uppervp nor lowervp. We romove 994 * the union node from cache, so that it will not be referrenced. 995 */ 996#if 0 997 union_newupper(un, NULLVP); 998#endif 999 if (un->un_dircache != 0) { 1000 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) 1001 vrele(*vpp); 1002 free(un->un_dircache, M_TEMP); 1003 un->un_dircache = 0; 1004 } 1005 1006 if (un->un_flags & UN_CACHED) { 1007 un->un_flags &= ~UN_CACHED; 1008 LIST_REMOVE(un, un_cache); 1009 } 1010 1011 if (un->un_flags & UN_ULOCK) { 1012 un->un_flags &= ~UN_ULOCK; 1013 VOP_UNLOCK(un->un_uppervp, 0, p); 1014 } 1015} 1016 1017#if 0 1018struct vnode * 1019union_lowervp(vp) 1020 struct vnode *vp; 1021{ 1022 struct union_node *un = VTOUNION(vp); 1023 1024 if ((un->un_lowervp != NULLVP) && 1025 (vp->v_type == un->un_lowervp->v_type)) { 1026 if (vget(un->un_lowervp, 0) == 0) 1027 return (un->un_lowervp); 1028 } 1029 1030 return (NULLVP); 1031} 1032#endif 1033 1034/* 1035 * determine whether a whiteout is needed 1036 * during a remove/rmdir operation. 1037 */ 1038int 1039union_dowhiteout(un, cred, p) 1040 struct union_node *un; 1041 struct ucred *cred; 1042 struct proc *p; 1043{ 1044 struct vattr va; 1045 1046 if (un->un_lowervp != NULLVP) 1047 return (1); 1048 1049 if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 && 1050 (va.va_flags & OPAQUE)) 1051 return (1); 1052 1053 return (0); 1054} 1055 1056static void 1057union_dircache_r(vp, vppp, cntp) 1058 struct vnode *vp; 1059 struct vnode ***vppp; 1060 int *cntp; 1061{ 1062 struct union_node *un; 1063 1064 if (vp->v_op != union_vnodeop_p) { 1065 if (vppp) { 1066 VREF(vp); 1067 *(*vppp)++ = vp; 1068 if (--(*cntp) == 0) 1069 panic("union: dircache table too small"); 1070 } else { 1071 (*cntp)++; 1072 } 1073 1074 return; 1075 } 1076 1077 un = VTOUNION(vp); 1078 if (un->un_uppervp != NULLVP) 1079 union_dircache_r(un->un_uppervp, vppp, cntp); 1080 if (un->un_lowervp != NULLVP) 1081 union_dircache_r(un->un_lowervp, vppp, cntp); 1082} 1083 1084struct vnode * 1085union_dircache(vp, p) 1086 struct vnode *vp; 1087 struct proc *p; 1088{ 1089 int cnt; 1090 struct vnode *nvp; 1091 struct vnode **vpp; 1092 struct vnode **dircache; 1093 struct union_node *un; 1094 int error; 1095 1096 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 1097 dircache = VTOUNION(vp)->un_dircache; 1098 1099 nvp = NULLVP; 1100 1101 if (dircache == 0) { 1102 cnt = 0; 1103 union_dircache_r(vp, 0, &cnt); 1104 cnt++; 1105 dircache = (struct vnode **) 1106 malloc(cnt * sizeof(struct vnode *), 1107 M_TEMP, M_WAITOK); 1108 vpp = dircache; 1109 union_dircache_r(vp, &vpp, &cnt); 1110 *vpp = NULLVP; 1111 vpp = dircache + 1; 1112 } else { 1113 vpp = dircache; 1114 do { 1115 if (*vpp++ == VTOUNION(vp)->un_uppervp) 1116 break; 1117 } while (*vpp != NULLVP); 1118 } 1119 1120 if (*vpp == NULLVP) 1121 goto out; 1122 1123 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); 1124 VREF(*vpp); 1125 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0); 1126 if (error) 1127 goto out; 1128 1129 VTOUNION(vp)->un_dircache = 0; 1130 un = VTOUNION(nvp); 1131 un->un_dircache = dircache; 1132 1133out: 1134 VOP_UNLOCK(vp, 0, p); 1135 return (nvp); 1136}
|