ufs_vnops.c revision 233875
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_vnops.c 233875 2012-04-04 14:50:21Z jh $"); 39 40#include "opt_quota.h" 41#include "opt_suiddir.h" 42#include "opt_ufs.h" 43#include "opt_ffs.h" 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/malloc.h> 48#include <sys/namei.h> 49#include <sys/kernel.h> 50#include <sys/fcntl.h> 51#include <sys/stat.h> 52#include <sys/bio.h> 53#include <sys/buf.h> 54#include <sys/mount.h> 55#include <sys/priv.h> 56#include <sys/refcount.h> 57#include <sys/unistd.h> 58#include <sys/vnode.h> 59#include <sys/dirent.h> 60#include <sys/lockf.h> 61#include <sys/conf.h> 62#include <sys/acl.h> 63 64#include <security/mac/mac_framework.h> 65 66#include <sys/file.h> /* XXX */ 67 68#include <vm/vm.h> 69#include <vm/vm_extern.h> 70 71#include <ufs/ufs/acl.h> 72#include <ufs/ufs/extattr.h> 73#include <ufs/ufs/quota.h> 74#include <ufs/ufs/inode.h> 75#include <ufs/ufs/dir.h> 76#include <ufs/ufs/ufsmount.h> 77#include <ufs/ufs/ufs_extern.h> 78#ifdef UFS_DIRHASH 79#include <ufs/ufs/dirhash.h> 80#endif 81#ifdef UFS_GJOURNAL 82#include <ufs/ufs/gjournal.h> 83FEATURE(ufs_gjournal, "Journaling support through GEOM for UFS"); 84#endif 85 86#ifdef QUOTA 87FEATURE(ufs_quota, "UFS disk quotas support"); 88FEATURE(ufs_quota64, "64bit UFS disk quotas support"); 89#endif 90 91#ifdef SUIDDIR 92FEATURE(suiddir, 93 "Give all new files in directory the same ownership as the directory"); 94#endif 95 96 97#include <ufs/ffs/ffs_extern.h> 98 99static vop_accessx_t ufs_accessx; 100static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *); 101static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *); 102static vop_close_t ufs_close; 103static vop_create_t ufs_create; 104static vop_getattr_t ufs_getattr; 105static vop_link_t ufs_link; 106static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *); 107static vop_markatime_t ufs_markatime; 108static vop_mkdir_t ufs_mkdir; 109static vop_mknod_t ufs_mknod; 110static vop_open_t ufs_open; 111static vop_pathconf_t ufs_pathconf; 112static vop_print_t ufs_print; 113static vop_readlink_t ufs_readlink; 114static vop_remove_t ufs_remove; 115static vop_rename_t ufs_rename; 116static vop_rmdir_t ufs_rmdir; 117static vop_setattr_t ufs_setattr; 118static vop_strategy_t ufs_strategy; 119static vop_symlink_t ufs_symlink; 120static vop_whiteout_t ufs_whiteout; 121static vop_close_t ufsfifo_close; 122static vop_kqfilter_t ufsfifo_kqfilter; 123static vop_pathconf_t ufsfifo_pathconf; 124 125SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem"); 126 127/* 128 * A virgin directory (no blushing please). 129 */ 130static struct dirtemplate mastertemplate = { 131 0, 12, DT_DIR, 1, ".", 132 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." 133}; 134static struct odirtemplate omastertemplate = { 135 0, 12, 1, ".", 136 0, DIRBLKSIZ - 12, 2, ".." 137}; 138 139static void 140ufs_itimes_locked(struct vnode *vp) 141{ 142 struct inode *ip; 143 struct timespec ts; 144 145 ASSERT_VI_LOCKED(vp, __func__); 146 147 ip = VTOI(vp); 148 if (UFS_RDONLY(ip)) 149 goto out; 150 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) 151 return; 152 153 if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp)) 154 ip->i_flag |= IN_LAZYMOD; 155 else if (((vp->v_mount->mnt_kern_flag & 156 (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) || 157 (ip->i_flag & (IN_CHANGE | IN_UPDATE))) 158 ip->i_flag |= IN_MODIFIED; 159 else if (ip->i_flag & IN_ACCESS) 160 ip->i_flag |= IN_LAZYACCESS; 161 vfs_timestamp(&ts); 162 if (ip->i_flag & IN_ACCESS) { 163 DIP_SET(ip, i_atime, ts.tv_sec); 164 DIP_SET(ip, i_atimensec, ts.tv_nsec); 165 } 166 if (ip->i_flag & IN_UPDATE) { 167 DIP_SET(ip, i_mtime, ts.tv_sec); 168 DIP_SET(ip, i_mtimensec, ts.tv_nsec); 169 } 170 if (ip->i_flag & IN_CHANGE) { 171 DIP_SET(ip, i_ctime, ts.tv_sec); 172 DIP_SET(ip, i_ctimensec, ts.tv_nsec); 173 DIP_SET(ip, i_modrev, DIP(ip, i_modrev) + 1); 174 } 175 176 out: 177 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); 178} 179 180void 181ufs_itimes(struct vnode *vp) 182{ 183 184 VI_LOCK(vp); 185 ufs_itimes_locked(vp); 186 VI_UNLOCK(vp); 187} 188 189/* 190 * Create a regular file 191 */ 192static int 193ufs_create(ap) 194 struct vop_create_args /* { 195 struct vnode *a_dvp; 196 struct vnode **a_vpp; 197 struct componentname *a_cnp; 198 struct vattr *a_vap; 199 } */ *ap; 200{ 201 int error; 202 203 error = 204 ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), 205 ap->a_dvp, ap->a_vpp, ap->a_cnp); 206 if (error) 207 return (error); 208 return (0); 209} 210 211/* 212 * Mknod vnode call 213 */ 214/* ARGSUSED */ 215static int 216ufs_mknod(ap) 217 struct vop_mknod_args /* { 218 struct vnode *a_dvp; 219 struct vnode **a_vpp; 220 struct componentname *a_cnp; 221 struct vattr *a_vap; 222 } */ *ap; 223{ 224 struct vattr *vap = ap->a_vap; 225 struct vnode **vpp = ap->a_vpp; 226 struct inode *ip; 227 ino_t ino; 228 int error; 229 230 error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 231 ap->a_dvp, vpp, ap->a_cnp); 232 if (error) 233 return (error); 234 ip = VTOI(*vpp); 235 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 236 if (vap->va_rdev != VNOVAL) { 237 /* 238 * Want to be able to use this to make badblock 239 * inodes, so don't truncate the dev number. 240 */ 241 DIP_SET(ip, i_rdev, vap->va_rdev); 242 } 243 /* 244 * Remove inode, then reload it through VFS_VGET so it is 245 * checked to see if it is an alias of an existing entry in 246 * the inode cache. XXX I don't believe this is necessary now. 247 */ 248 (*vpp)->v_type = VNON; 249 ino = ip->i_number; /* Save this before vgone() invalidates ip. */ 250 vgone(*vpp); 251 vput(*vpp); 252 error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); 253 if (error) { 254 *vpp = NULL; 255 return (error); 256 } 257 return (0); 258} 259 260/* 261 * Open called. 262 */ 263/* ARGSUSED */ 264static int 265ufs_open(struct vop_open_args *ap) 266{ 267 struct vnode *vp = ap->a_vp; 268 struct inode *ip; 269 270 if (vp->v_type == VCHR || vp->v_type == VBLK) 271 return (EOPNOTSUPP); 272 273 ip = VTOI(vp); 274 /* 275 * Files marked append-only must be opened for appending. 276 */ 277 if ((ip->i_flags & APPEND) && 278 (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) 279 return (EPERM); 280 vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td); 281 return (0); 282} 283 284/* 285 * Close called. 286 * 287 * Update the times on the inode. 288 */ 289/* ARGSUSED */ 290static int 291ufs_close(ap) 292 struct vop_close_args /* { 293 struct vnode *a_vp; 294 int a_fflag; 295 struct ucred *a_cred; 296 struct thread *a_td; 297 } */ *ap; 298{ 299 struct vnode *vp = ap->a_vp; 300 int usecount; 301 302 VI_LOCK(vp); 303 usecount = vp->v_usecount; 304 if (usecount > 1) 305 ufs_itimes_locked(vp); 306 VI_UNLOCK(vp); 307 return (0); 308} 309 310static int 311ufs_accessx(ap) 312 struct vop_accessx_args /* { 313 struct vnode *a_vp; 314 accmode_t a_accmode; 315 struct ucred *a_cred; 316 struct thread *a_td; 317 } */ *ap; 318{ 319 struct vnode *vp = ap->a_vp; 320 struct inode *ip = VTOI(vp); 321 accmode_t accmode = ap->a_accmode; 322 int error; 323#ifdef QUOTA 324 int relocked; 325#endif 326#ifdef UFS_ACL 327 struct acl *acl; 328 acl_type_t type; 329#endif 330 331 /* 332 * Disallow write attempts on read-only filesystems; 333 * unless the file is a socket, fifo, or a block or 334 * character device resident on the filesystem. 335 */ 336 if (accmode & VMODIFY_PERMS) { 337 switch (vp->v_type) { 338 case VDIR: 339 case VLNK: 340 case VREG: 341 if (vp->v_mount->mnt_flag & MNT_RDONLY) 342 return (EROFS); 343#ifdef QUOTA 344 /* 345 * Inode is accounted in the quotas only if struct 346 * dquot is attached to it. VOP_ACCESS() is called 347 * from vn_open_cred() and provides a convenient 348 * point to call getinoquota(). 349 */ 350 if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { 351 352 /* 353 * Upgrade vnode lock, since getinoquota() 354 * requires exclusive lock to modify inode. 355 */ 356 relocked = 1; 357 vhold(vp); 358 vn_lock(vp, LK_UPGRADE | LK_RETRY); 359 VI_LOCK(vp); 360 if (vp->v_iflag & VI_DOOMED) { 361 vdropl(vp); 362 error = ENOENT; 363 goto relock; 364 } 365 vdropl(vp); 366 } else 367 relocked = 0; 368 error = getinoquota(ip); 369relock: 370 if (relocked) 371 vn_lock(vp, LK_DOWNGRADE | LK_RETRY); 372 if (error != 0) 373 return (error); 374#endif 375 break; 376 default: 377 break; 378 } 379 } 380 381 /* 382 * If immutable bit set, nobody gets to write it. "& ~VADMIN_PERMS" 383 * is here, because without it, * it would be impossible for the owner 384 * to remove the IMMUTABLE flag. 385 */ 386 if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) && 387 (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT))) 388 return (EPERM); 389 390#ifdef UFS_ACL 391 if ((vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) != 0) { 392 if (vp->v_mount->mnt_flag & MNT_NFS4ACLS) 393 type = ACL_TYPE_NFS4; 394 else 395 type = ACL_TYPE_ACCESS; 396 397 acl = acl_alloc(M_WAITOK); 398 if (type == ACL_TYPE_NFS4) 399 error = ufs_getacl_nfs4_internal(vp, acl, ap->a_td); 400 else 401 error = VOP_GETACL(vp, type, acl, ap->a_cred, ap->a_td); 402 switch (error) { 403 case 0: 404 if (type == ACL_TYPE_NFS4) { 405 error = vaccess_acl_nfs4(vp->v_type, ip->i_uid, 406 ip->i_gid, acl, accmode, ap->a_cred, NULL); 407 } else { 408 error = vfs_unixify_accmode(&accmode); 409 if (error == 0) 410 error = vaccess_acl_posix1e(vp->v_type, ip->i_uid, 411 ip->i_gid, acl, accmode, ap->a_cred, NULL); 412 } 413 break; 414 default: 415 if (error != EOPNOTSUPP) 416 printf( 417"ufs_accessx(): Error retrieving ACL on object (%d).\n", 418 error); 419 /* 420 * XXX: Fall back until debugged. Should 421 * eventually possibly log an error, and return 422 * EPERM for safety. 423 */ 424 error = vfs_unixify_accmode(&accmode); 425 if (error == 0) 426 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, 427 ip->i_gid, accmode, ap->a_cred, NULL); 428 } 429 acl_free(acl); 430 431 return (error); 432 } 433#endif /* !UFS_ACL */ 434 error = vfs_unixify_accmode(&accmode); 435 if (error == 0) 436 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, 437 accmode, ap->a_cred, NULL); 438 return (error); 439} 440 441/* ARGSUSED */ 442static int 443ufs_getattr(ap) 444 struct vop_getattr_args /* { 445 struct vnode *a_vp; 446 struct vattr *a_vap; 447 struct ucred *a_cred; 448 } */ *ap; 449{ 450 struct vnode *vp = ap->a_vp; 451 struct inode *ip = VTOI(vp); 452 struct vattr *vap = ap->a_vap; 453 454 VI_LOCK(vp); 455 ufs_itimes_locked(vp); 456 if (ip->i_ump->um_fstype == UFS1) { 457 vap->va_atime.tv_sec = ip->i_din1->di_atime; 458 vap->va_atime.tv_nsec = ip->i_din1->di_atimensec; 459 } else { 460 vap->va_atime.tv_sec = ip->i_din2->di_atime; 461 vap->va_atime.tv_nsec = ip->i_din2->di_atimensec; 462 } 463 VI_UNLOCK(vp); 464 /* 465 * Copy from inode table 466 */ 467 vap->va_fsid = dev2udev(ip->i_dev); 468 vap->va_fileid = ip->i_number; 469 vap->va_mode = ip->i_mode & ~IFMT; 470 vap->va_nlink = ip->i_effnlink; 471 vap->va_uid = ip->i_uid; 472 vap->va_gid = ip->i_gid; 473 if (ip->i_ump->um_fstype == UFS1) { 474 vap->va_rdev = ip->i_din1->di_rdev; 475 vap->va_size = ip->i_din1->di_size; 476 vap->va_mtime.tv_sec = ip->i_din1->di_mtime; 477 vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec; 478 vap->va_ctime.tv_sec = ip->i_din1->di_ctime; 479 vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec; 480 vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks); 481 vap->va_filerev = ip->i_din1->di_modrev; 482 } else { 483 vap->va_rdev = ip->i_din2->di_rdev; 484 vap->va_size = ip->i_din2->di_size; 485 vap->va_mtime.tv_sec = ip->i_din2->di_mtime; 486 vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec; 487 vap->va_ctime.tv_sec = ip->i_din2->di_ctime; 488 vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec; 489 vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime; 490 vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec; 491 vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks); 492 vap->va_filerev = ip->i_din2->di_modrev; 493 } 494 vap->va_flags = ip->i_flags; 495 vap->va_gen = ip->i_gen; 496 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 497 vap->va_type = IFTOVT(ip->i_mode); 498 return (0); 499} 500 501/* 502 * Set attribute vnode op. called from several syscalls 503 */ 504static int 505ufs_setattr(ap) 506 struct vop_setattr_args /* { 507 struct vnode *a_vp; 508 struct vattr *a_vap; 509 struct ucred *a_cred; 510 } */ *ap; 511{ 512 struct vattr *vap = ap->a_vap; 513 struct vnode *vp = ap->a_vp; 514 struct inode *ip = VTOI(vp); 515 struct ucred *cred = ap->a_cred; 516 struct thread *td = curthread; 517 int error; 518 519 /* 520 * Check for unsettable attributes. 521 */ 522 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || 523 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || 524 (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || 525 ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { 526 return (EINVAL); 527 } 528 if (vap->va_flags != VNOVAL) { 529 if ((vap->va_flags & ~(UF_NODUMP | UF_IMMUTABLE | UF_APPEND | 530 UF_OPAQUE | UF_NOUNLINK | SF_ARCHIVED | SF_IMMUTABLE | 531 SF_APPEND | SF_NOUNLINK | SF_SNAPSHOT)) != 0) 532 return (EOPNOTSUPP); 533 if (vp->v_mount->mnt_flag & MNT_RDONLY) 534 return (EROFS); 535 /* 536 * Callers may only modify the file flags on objects they 537 * have VADMIN rights for. 538 */ 539 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 540 return (error); 541 /* 542 * Unprivileged processes are not permitted to unset system 543 * flags, or modify flags if any system flags are set. 544 * Privileged non-jail processes may not modify system flags 545 * if securelevel > 0 and any existing system flags are set. 546 * Privileged jail processes behave like privileged non-jail 547 * processes if the security.jail.chflags_allowed sysctl is 548 * is non-zero; otherwise, they behave like unprivileged 549 * processes. 550 */ 551 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 552 if (ip->i_flags & 553 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { 554 error = securelevel_gt(cred, 0); 555 if (error) 556 return (error); 557 } 558 /* Snapshot flag cannot be set or cleared */ 559 if (((vap->va_flags & SF_SNAPSHOT) != 0 && 560 (ip->i_flags & SF_SNAPSHOT) == 0) || 561 ((vap->va_flags & SF_SNAPSHOT) == 0 && 562 (ip->i_flags & SF_SNAPSHOT) != 0)) 563 return (EPERM); 564 ip->i_flags = vap->va_flags; 565 DIP_SET(ip, i_flags, vap->va_flags); 566 } else { 567 if (ip->i_flags & 568 (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 569 (vap->va_flags & UF_SETTABLE) != vap->va_flags) 570 return (EPERM); 571 ip->i_flags &= SF_SETTABLE; 572 ip->i_flags |= (vap->va_flags & UF_SETTABLE); 573 DIP_SET(ip, i_flags, ip->i_flags); 574 } 575 ip->i_flag |= IN_CHANGE; 576 error = UFS_UPDATE(vp, 0); 577 if (ip->i_flags & (IMMUTABLE | APPEND)) 578 return (error); 579 } 580 /* 581 * If immutable or append, no one can change any of its attributes 582 * except the ones already handled (exec atime and, in some cases 583 * for the superuser, file flags including the immutability flags 584 * themselves). 585 */ 586 if (ip->i_flags & (IMMUTABLE | APPEND)) 587 return (EPERM); 588 /* 589 * Go through the fields and update iff not VNOVAL. 590 */ 591 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 592 if (vp->v_mount->mnt_flag & MNT_RDONLY) 593 return (EROFS); 594 if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, 595 td)) != 0) 596 return (error); 597 } 598 if (vap->va_size != VNOVAL) { 599 /* 600 * XXX most of the following special cases should be in 601 * callers instead of in N filesystems. The VDIR check 602 * mostly already is. 603 */ 604 switch (vp->v_type) { 605 case VDIR: 606 return (EISDIR); 607 case VLNK: 608 case VREG: 609 /* 610 * Truncation should have an effect in these cases. 611 * Disallow it if the filesystem is read-only or 612 * the file is being snapshotted. 613 */ 614 if (vp->v_mount->mnt_flag & MNT_RDONLY) 615 return (EROFS); 616 if ((ip->i_flags & SF_SNAPSHOT) != 0) 617 return (EPERM); 618 break; 619 default: 620 /* 621 * According to POSIX, the result is unspecified 622 * for file types other than regular files, 623 * directories and shared memory objects. We 624 * don't support shared memory objects in the file 625 * system, and have dubious support for truncating 626 * symlinks. Just ignore the request in other cases. 627 */ 628 return (0); 629 } 630 if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL, 631 cred, td)) != 0) 632 return (error); 633 } 634 if (vap->va_atime.tv_sec != VNOVAL || 635 vap->va_mtime.tv_sec != VNOVAL || 636 vap->va_birthtime.tv_sec != VNOVAL) { 637 if (vp->v_mount->mnt_flag & MNT_RDONLY) 638 return (EROFS); 639 if ((ip->i_flags & SF_SNAPSHOT) != 0) 640 return (EPERM); 641 /* 642 * From utimes(2): 643 * If times is NULL, ... The caller must be the owner of 644 * the file, have permission to write the file, or be the 645 * super-user. 646 * If times is non-NULL, ... The caller must be the owner of 647 * the file or be the super-user. 648 * 649 * Possibly for historical reasons, try to use VADMIN in 650 * preference to VWRITE for a NULL timestamp. This means we 651 * will return EACCES in preference to EPERM if neither 652 * check succeeds. 653 */ 654 if (vap->va_vaflags & VA_UTIMES_NULL) { 655 /* 656 * NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes 657 * 658 * "A user having ACL_WRITE_DATA or ACL_WRITE_ATTRIBUTES 659 * will be allowed to set the times [..] to the current 660 * server time." 661 * 662 * XXX: Calling it four times seems a little excessive. 663 */ 664 error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td); 665 if (error) 666 error = VOP_ACCESS(vp, VWRITE, cred, td); 667 } else 668 error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td); 669 if (error) 670 return (error); 671 if (vap->va_atime.tv_sec != VNOVAL) 672 ip->i_flag |= IN_ACCESS; 673 if (vap->va_mtime.tv_sec != VNOVAL) 674 ip->i_flag |= IN_CHANGE | IN_UPDATE; 675 if (vap->va_birthtime.tv_sec != VNOVAL && 676 ip->i_ump->um_fstype == UFS2) 677 ip->i_flag |= IN_MODIFIED; 678 ufs_itimes(vp); 679 if (vap->va_atime.tv_sec != VNOVAL) { 680 DIP_SET(ip, i_atime, vap->va_atime.tv_sec); 681 DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec); 682 } 683 if (vap->va_mtime.tv_sec != VNOVAL) { 684 DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec); 685 DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec); 686 } 687 if (vap->va_birthtime.tv_sec != VNOVAL && 688 ip->i_ump->um_fstype == UFS2) { 689 ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec; 690 ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec; 691 } 692 error = UFS_UPDATE(vp, 0); 693 if (error) 694 return (error); 695 } 696 error = 0; 697 if (vap->va_mode != (mode_t)VNOVAL) { 698 if (vp->v_mount->mnt_flag & MNT_RDONLY) 699 return (EROFS); 700 if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode & 701 (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH))) 702 return (EPERM); 703 error = ufs_chmod(vp, (int)vap->va_mode, cred, td); 704 } 705 return (error); 706} 707 708#ifdef UFS_ACL 709static int 710ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode, 711 int file_owner_id, struct ucred *cred, struct thread *td) 712{ 713 int error; 714 struct acl *aclp; 715 716 aclp = acl_alloc(M_WAITOK); 717 error = ufs_getacl_nfs4_internal(vp, aclp, td); 718 /* 719 * We don't have to handle EOPNOTSUPP here, as the filesystem claims 720 * it supports ACLs. 721 */ 722 if (error) 723 goto out; 724 725 acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id); 726 error = ufs_setacl_nfs4_internal(vp, aclp, td); 727 728out: 729 acl_free(aclp); 730 return (error); 731} 732#endif /* UFS_ACL */ 733 734/* 735 * Mark this file's access time for update for vfs_mark_atime(). This 736 * is called from execve() and mmap(). 737 */ 738static int 739ufs_markatime(ap) 740 struct vop_markatime_args /* { 741 struct vnode *a_vp; 742 } */ *ap; 743{ 744 struct vnode *vp = ap->a_vp; 745 struct inode *ip = VTOI(vp); 746 747 VI_LOCK(vp); 748 ip->i_flag |= IN_ACCESS; 749 VI_UNLOCK(vp); 750 /* 751 * XXXKIB No UFS_UPDATE(ap->a_vp, 0) there. 752 */ 753 return (0); 754} 755 756/* 757 * Change the mode on a file. 758 * Inode must be locked before calling. 759 */ 760static int 761ufs_chmod(vp, mode, cred, td) 762 struct vnode *vp; 763 int mode; 764 struct ucred *cred; 765 struct thread *td; 766{ 767 struct inode *ip = VTOI(vp); 768 int error; 769 770 /* 771 * To modify the permissions on a file, must possess VADMIN 772 * for that file. 773 */ 774 if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred, td))) 775 return (error); 776 /* 777 * Privileged processes may set the sticky bit on non-directories, 778 * as well as set the setgid bit on a file with a group that the 779 * process is not a member of. Both of these are allowed in 780 * jail(8). 781 */ 782 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 783 if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0)) 784 return (EFTYPE); 785 } 786 if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { 787 error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); 788 if (error) 789 return (error); 790 } 791 792 /* 793 * Deny setting setuid if we are not the file owner. 794 */ 795 if ((mode & ISUID) && ip->i_uid != cred->cr_uid) { 796 error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0); 797 if (error) 798 return (error); 799 } 800 801 ip->i_mode &= ~ALLPERMS; 802 ip->i_mode |= (mode & ALLPERMS); 803 DIP_SET(ip, i_mode, ip->i_mode); 804 ip->i_flag |= IN_CHANGE; 805#ifdef UFS_ACL 806 if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0) 807 error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td); 808#endif 809 if (error == 0 && (ip->i_flag & IN_CHANGE) != 0) 810 error = UFS_UPDATE(vp, 0); 811 812 return (error); 813} 814 815/* 816 * Perform chown operation on inode ip; 817 * inode must be locked prior to call. 818 */ 819static int 820ufs_chown(vp, uid, gid, cred, td) 821 struct vnode *vp; 822 uid_t uid; 823 gid_t gid; 824 struct ucred *cred; 825 struct thread *td; 826{ 827 struct inode *ip = VTOI(vp); 828 uid_t ouid; 829 gid_t ogid; 830 int error = 0; 831#ifdef QUOTA 832 int i; 833 ufs2_daddr_t change; 834#endif 835 836 if (uid == (uid_t)VNOVAL) 837 uid = ip->i_uid; 838 if (gid == (gid_t)VNOVAL) 839 gid = ip->i_gid; 840 /* 841 * To modify the ownership of a file, must possess VADMIN for that 842 * file. 843 */ 844 if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td))) 845 return (error); 846 /* 847 * To change the owner of a file, or change the group of a file to a 848 * group of which we are not a member, the caller must have 849 * privilege. 850 */ 851 if (((uid != ip->i_uid && uid != cred->cr_uid) || 852 (gid != ip->i_gid && !groupmember(gid, cred))) && 853 (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0))) 854 return (error); 855 ogid = ip->i_gid; 856 ouid = ip->i_uid; 857#ifdef QUOTA 858 if ((error = getinoquota(ip)) != 0) 859 return (error); 860 if (ouid == uid) { 861 dqrele(vp, ip->i_dquot[USRQUOTA]); 862 ip->i_dquot[USRQUOTA] = NODQUOT; 863 } 864 if (ogid == gid) { 865 dqrele(vp, ip->i_dquot[GRPQUOTA]); 866 ip->i_dquot[GRPQUOTA] = NODQUOT; 867 } 868 change = DIP(ip, i_blocks); 869 (void) chkdq(ip, -change, cred, CHOWN); 870 (void) chkiq(ip, -1, cred, CHOWN); 871 for (i = 0; i < MAXQUOTAS; i++) { 872 dqrele(vp, ip->i_dquot[i]); 873 ip->i_dquot[i] = NODQUOT; 874 } 875#endif 876 ip->i_gid = gid; 877 DIP_SET(ip, i_gid, gid); 878 ip->i_uid = uid; 879 DIP_SET(ip, i_uid, uid); 880#ifdef QUOTA 881 if ((error = getinoquota(ip)) == 0) { 882 if (ouid == uid) { 883 dqrele(vp, ip->i_dquot[USRQUOTA]); 884 ip->i_dquot[USRQUOTA] = NODQUOT; 885 } 886 if (ogid == gid) { 887 dqrele(vp, ip->i_dquot[GRPQUOTA]); 888 ip->i_dquot[GRPQUOTA] = NODQUOT; 889 } 890 if ((error = chkdq(ip, change, cred, CHOWN)) == 0) { 891 if ((error = chkiq(ip, 1, cred, CHOWN)) == 0) 892 goto good; 893 else 894 (void) chkdq(ip, -change, cred, CHOWN|FORCE); 895 } 896 for (i = 0; i < MAXQUOTAS; i++) { 897 dqrele(vp, ip->i_dquot[i]); 898 ip->i_dquot[i] = NODQUOT; 899 } 900 } 901 ip->i_gid = ogid; 902 DIP_SET(ip, i_gid, ogid); 903 ip->i_uid = ouid; 904 DIP_SET(ip, i_uid, ouid); 905 if (getinoquota(ip) == 0) { 906 if (ouid == uid) { 907 dqrele(vp, ip->i_dquot[USRQUOTA]); 908 ip->i_dquot[USRQUOTA] = NODQUOT; 909 } 910 if (ogid == gid) { 911 dqrele(vp, ip->i_dquot[GRPQUOTA]); 912 ip->i_dquot[GRPQUOTA] = NODQUOT; 913 } 914 (void) chkdq(ip, change, cred, FORCE|CHOWN); 915 (void) chkiq(ip, 1, cred, FORCE|CHOWN); 916 (void) getinoquota(ip); 917 } 918 return (error); 919good: 920 if (getinoquota(ip)) 921 panic("ufs_chown: lost quota"); 922#endif /* QUOTA */ 923 ip->i_flag |= IN_CHANGE; 924 if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { 925 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) { 926 ip->i_mode &= ~(ISUID | ISGID); 927 DIP_SET(ip, i_mode, ip->i_mode); 928 } 929 } 930 error = UFS_UPDATE(vp, 0); 931 return (error); 932} 933 934static int 935ufs_remove(ap) 936 struct vop_remove_args /* { 937 struct vnode *a_dvp; 938 struct vnode *a_vp; 939 struct componentname *a_cnp; 940 } */ *ap; 941{ 942 struct inode *ip; 943 struct vnode *vp = ap->a_vp; 944 struct vnode *dvp = ap->a_dvp; 945 int error; 946 struct thread *td; 947 948 td = curthread; 949 ip = VTOI(vp); 950 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 951 (VTOI(dvp)->i_flags & APPEND)) { 952 error = EPERM; 953 goto out; 954 } 955#ifdef UFS_GJOURNAL 956 ufs_gjournal_orphan(vp); 957#endif 958 error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0); 959 if (ip->i_nlink <= 0) 960 vp->v_vflag |= VV_NOSYNC; 961 if ((ip->i_flags & SF_SNAPSHOT) != 0) { 962 /* 963 * Avoid deadlock where another thread is trying to 964 * update the inodeblock for dvp and is waiting on 965 * snaplk. Temporary unlock the vnode lock for the 966 * unlinked file and sync the directory. This should 967 * allow vput() of the directory to not block later on 968 * while holding the snapshot vnode locked, assuming 969 * that the directory hasn't been unlinked too. 970 */ 971 VOP_UNLOCK(vp, 0); 972 (void) VOP_FSYNC(dvp, MNT_WAIT, td); 973 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 974 } 975out: 976 return (error); 977} 978 979/* 980 * link vnode call 981 */ 982static int 983ufs_link(ap) 984 struct vop_link_args /* { 985 struct vnode *a_tdvp; 986 struct vnode *a_vp; 987 struct componentname *a_cnp; 988 } */ *ap; 989{ 990 struct vnode *vp = ap->a_vp; 991 struct vnode *tdvp = ap->a_tdvp; 992 struct componentname *cnp = ap->a_cnp; 993 struct inode *ip; 994 struct direct newdir; 995 int error; 996 997#ifdef INVARIANTS 998 if ((cnp->cn_flags & HASBUF) == 0) 999 panic("ufs_link: no name"); 1000#endif 1001 if (tdvp->v_mount != vp->v_mount) { 1002 error = EXDEV; 1003 goto out; 1004 } 1005 if (VTOI(tdvp)->i_effnlink < 2) 1006 panic("ufs_link: Bad link count %d on parent", 1007 VTOI(tdvp)->i_effnlink); 1008 ip = VTOI(vp); 1009 if ((nlink_t)ip->i_nlink >= LINK_MAX) { 1010 error = EMLINK; 1011 goto out; 1012 } 1013 /* 1014 * The file may have been removed after namei droped the original 1015 * lock. 1016 */ 1017 if (ip->i_effnlink == 0) { 1018 error = ENOENT; 1019 goto out; 1020 } 1021 if (ip->i_flags & (IMMUTABLE | APPEND)) { 1022 error = EPERM; 1023 goto out; 1024 } 1025 ip->i_effnlink++; 1026 ip->i_nlink++; 1027 DIP_SET(ip, i_nlink, ip->i_nlink); 1028 ip->i_flag |= IN_CHANGE; 1029 if (DOINGSOFTDEP(vp)) 1030 softdep_setup_link(VTOI(tdvp), ip); 1031 error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp))); 1032 if (!error) { 1033 ufs_makedirentry(ip, cnp, &newdir); 1034 error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0); 1035 } 1036 1037 if (error) { 1038 ip->i_effnlink--; 1039 ip->i_nlink--; 1040 DIP_SET(ip, i_nlink, ip->i_nlink); 1041 ip->i_flag |= IN_CHANGE; 1042 if (DOINGSOFTDEP(vp)) 1043 softdep_revert_link(VTOI(tdvp), ip); 1044 } 1045out: 1046 return (error); 1047} 1048 1049/* 1050 * whiteout vnode call 1051 */ 1052static int 1053ufs_whiteout(ap) 1054 struct vop_whiteout_args /* { 1055 struct vnode *a_dvp; 1056 struct componentname *a_cnp; 1057 int a_flags; 1058 } */ *ap; 1059{ 1060 struct vnode *dvp = ap->a_dvp; 1061 struct componentname *cnp = ap->a_cnp; 1062 struct direct newdir; 1063 int error = 0; 1064 1065 switch (ap->a_flags) { 1066 case LOOKUP: 1067 /* 4.4 format directories support whiteout operations */ 1068 if (dvp->v_mount->mnt_maxsymlinklen > 0) 1069 return (0); 1070 return (EOPNOTSUPP); 1071 1072 case CREATE: 1073 /* create a new directory whiteout */ 1074#ifdef INVARIANTS 1075 if ((cnp->cn_flags & SAVENAME) == 0) 1076 panic("ufs_whiteout: missing name"); 1077 if (dvp->v_mount->mnt_maxsymlinklen <= 0) 1078 panic("ufs_whiteout: old format filesystem"); 1079#endif 1080 1081 newdir.d_ino = WINO; 1082 newdir.d_namlen = cnp->cn_namelen; 1083 bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); 1084 newdir.d_type = DT_WHT; 1085 error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0); 1086 break; 1087 1088 case DELETE: 1089 /* remove an existing directory whiteout */ 1090#ifdef INVARIANTS 1091 if (dvp->v_mount->mnt_maxsymlinklen <= 0) 1092 panic("ufs_whiteout: old format filesystem"); 1093#endif 1094 1095 cnp->cn_flags &= ~DOWHITEOUT; 1096 error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0); 1097 break; 1098 default: 1099 panic("ufs_whiteout: unknown op"); 1100 } 1101 return (error); 1102} 1103 1104static volatile int rename_restarts; 1105SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD, 1106 __DEVOLATILE(int *, &rename_restarts), 0, 1107 "Times rename had to restart due to lock contention"); 1108 1109/* 1110 * Rename system call. 1111 * rename("foo", "bar"); 1112 * is essentially 1113 * unlink("bar"); 1114 * link("foo", "bar"); 1115 * unlink("foo"); 1116 * but ``atomically''. Can't do full commit without saving state in the 1117 * inode on disk which isn't feasible at this time. Best we can do is 1118 * always guarantee the target exists. 1119 * 1120 * Basic algorithm is: 1121 * 1122 * 1) Bump link count on source while we're linking it to the 1123 * target. This also ensure the inode won't be deleted out 1124 * from underneath us while we work (it may be truncated by 1125 * a concurrent `trunc' or `open' for creation). 1126 * 2) Link source to destination. If destination already exists, 1127 * delete it first. 1128 * 3) Unlink source reference to inode if still around. If a 1129 * directory was moved and the parent of the destination 1130 * is different from the source, patch the ".." entry in the 1131 * directory. 1132 */ 1133static int 1134ufs_rename(ap) 1135 struct vop_rename_args /* { 1136 struct vnode *a_fdvp; 1137 struct vnode *a_fvp; 1138 struct componentname *a_fcnp; 1139 struct vnode *a_tdvp; 1140 struct vnode *a_tvp; 1141 struct componentname *a_tcnp; 1142 } */ *ap; 1143{ 1144 struct vnode *tvp = ap->a_tvp; 1145 struct vnode *tdvp = ap->a_tdvp; 1146 struct vnode *fvp = ap->a_fvp; 1147 struct vnode *fdvp = ap->a_fdvp; 1148 struct vnode *nvp; 1149 struct componentname *tcnp = ap->a_tcnp; 1150 struct componentname *fcnp = ap->a_fcnp; 1151 struct thread *td = fcnp->cn_thread; 1152 struct inode *fip, *tip, *tdp, *fdp; 1153 struct direct newdir; 1154 off_t endoff; 1155 int doingdirectory, newparent; 1156 int error = 0; 1157 struct mount *mp; 1158 ino_t ino; 1159 1160#ifdef INVARIANTS 1161 if ((tcnp->cn_flags & HASBUF) == 0 || 1162 (fcnp->cn_flags & HASBUF) == 0) 1163 panic("ufs_rename: no name"); 1164#endif 1165 endoff = 0; 1166 mp = tdvp->v_mount; 1167 VOP_UNLOCK(tdvp, 0); 1168 if (tvp && tvp != tdvp) 1169 VOP_UNLOCK(tvp, 0); 1170 /* 1171 * Check for cross-device rename. 1172 */ 1173 if ((fvp->v_mount != tdvp->v_mount) || 1174 (tvp && (fvp->v_mount != tvp->v_mount))) { 1175 error = EXDEV; 1176 mp = NULL; 1177 goto releout; 1178 } 1179 error = vfs_busy(mp, 0); 1180 if (error) { 1181 mp = NULL; 1182 goto releout; 1183 } 1184relock: 1185 /* 1186 * We need to acquire 2 to 4 locks depending on whether tvp is NULL 1187 * and fdvp and tdvp are the same directory. Subsequently we need 1188 * to double-check all paths and in the directory rename case we 1189 * need to verify that we are not creating a directory loop. To 1190 * handle this we acquire all but fdvp using non-blocking 1191 * acquisitions. If we fail to acquire any lock in the path we will 1192 * drop all held locks, acquire the new lock in a blocking fashion, 1193 * and then release it and restart the rename. This acquire/release 1194 * step ensures that we do not spin on a lock waiting for release. 1195 */ 1196 error = vn_lock(fdvp, LK_EXCLUSIVE); 1197 if (error) 1198 goto releout; 1199 if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { 1200 VOP_UNLOCK(fdvp, 0); 1201 error = vn_lock(tdvp, LK_EXCLUSIVE); 1202 if (error) 1203 goto releout; 1204 VOP_UNLOCK(tdvp, 0); 1205 atomic_add_int(&rename_restarts, 1); 1206 goto relock; 1207 } 1208 /* 1209 * Re-resolve fvp to be certain it still exists and fetch the 1210 * correct vnode. 1211 */ 1212 error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); 1213 if (error) { 1214 VOP_UNLOCK(fdvp, 0); 1215 VOP_UNLOCK(tdvp, 0); 1216 goto releout; 1217 } 1218 error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp); 1219 if (error) { 1220 VOP_UNLOCK(fdvp, 0); 1221 VOP_UNLOCK(tdvp, 0); 1222 if (error != EBUSY) 1223 goto releout; 1224 error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); 1225 if (error != 0) 1226 goto releout; 1227 VOP_UNLOCK(nvp, 0); 1228 vrele(fvp); 1229 fvp = nvp; 1230 atomic_add_int(&rename_restarts, 1); 1231 goto relock; 1232 } 1233 vrele(fvp); 1234 fvp = nvp; 1235 /* 1236 * Re-resolve tvp and acquire the vnode lock if present. 1237 */ 1238 error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino); 1239 if (error != 0 && error != EJUSTRETURN) { 1240 VOP_UNLOCK(fdvp, 0); 1241 VOP_UNLOCK(tdvp, 0); 1242 VOP_UNLOCK(fvp, 0); 1243 goto releout; 1244 } 1245 /* 1246 * If tvp disappeared we just carry on. 1247 */ 1248 if (error == EJUSTRETURN && tvp != NULL) { 1249 vrele(tvp); 1250 tvp = NULL; 1251 } 1252 /* 1253 * Get the tvp ino if the lookup succeeded. We may have to restart 1254 * if the non-blocking acquire fails. 1255 */ 1256 if (error == 0) { 1257 nvp = NULL; 1258 error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp); 1259 if (tvp) 1260 vrele(tvp); 1261 tvp = nvp; 1262 if (error) { 1263 VOP_UNLOCK(fdvp, 0); 1264 VOP_UNLOCK(tdvp, 0); 1265 VOP_UNLOCK(fvp, 0); 1266 if (error != EBUSY) 1267 goto releout; 1268 error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); 1269 if (error != 0) 1270 goto releout; 1271 VOP_UNLOCK(nvp, 0); 1272 atomic_add_int(&rename_restarts, 1); 1273 goto relock; 1274 } 1275 } 1276 fdp = VTOI(fdvp); 1277 fip = VTOI(fvp); 1278 tdp = VTOI(tdvp); 1279 tip = NULL; 1280 if (tvp) 1281 tip = VTOI(tvp); 1282 if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 1283 (VTOI(tdvp)->i_flags & APPEND))) { 1284 error = EPERM; 1285 goto unlockout; 1286 } 1287 /* 1288 * Renaming a file to itself has no effect. The upper layers should 1289 * not call us in that case. However, things could change after 1290 * we drop the locks above. 1291 */ 1292 if (fvp == tvp) { 1293 error = 0; 1294 goto unlockout; 1295 } 1296 doingdirectory = 0; 1297 newparent = 0; 1298 ino = fip->i_number; 1299 if (fip->i_nlink >= LINK_MAX) { 1300 error = EMLINK; 1301 goto unlockout; 1302 } 1303 if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) 1304 || (fdp->i_flags & APPEND)) { 1305 error = EPERM; 1306 goto unlockout; 1307 } 1308 if ((fip->i_mode & IFMT) == IFDIR) { 1309 /* 1310 * Avoid ".", "..", and aliases of "." for obvious reasons. 1311 */ 1312 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 1313 fdp == fip || 1314 (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { 1315 error = EINVAL; 1316 goto unlockout; 1317 } 1318 if (fdp->i_number != tdp->i_number) 1319 newparent = tdp->i_number; 1320 doingdirectory = 1; 1321 } 1322 if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) || 1323 (tvp != NULL && tvp->v_type == VDIR && 1324 tvp->v_mountedhere != NULL)) { 1325 error = EXDEV; 1326 goto unlockout; 1327 } 1328 1329 /* 1330 * If ".." must be changed (ie the directory gets a new 1331 * parent) then the source directory must not be in the 1332 * directory hierarchy above the target, as this would 1333 * orphan everything below the source directory. Also 1334 * the user must have write permission in the source so 1335 * as to be able to change "..". 1336 */ 1337 if (doingdirectory && newparent) { 1338 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); 1339 if (error) 1340 goto unlockout; 1341 error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred, 1342 &ino); 1343 /* 1344 * We encountered a lock that we have to wait for. Unlock 1345 * everything else and VGET before restarting. 1346 */ 1347 if (ino) { 1348 VOP_UNLOCK(fdvp, 0); 1349 VOP_UNLOCK(fvp, 0); 1350 VOP_UNLOCK(tdvp, 0); 1351 if (tvp) 1352 VOP_UNLOCK(tvp, 0); 1353 error = VFS_VGET(mp, ino, LK_SHARED, &nvp); 1354 if (error == 0) 1355 vput(nvp); 1356 atomic_add_int(&rename_restarts, 1); 1357 goto relock; 1358 } 1359 if (error) 1360 goto unlockout; 1361 if ((tcnp->cn_flags & SAVESTART) == 0) 1362 panic("ufs_rename: lost to startdir"); 1363 } 1364 if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 || 1365 tdp->i_effnlink == 0) 1366 panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp); 1367 1368 /* 1369 * 1) Bump link count while we're moving stuff 1370 * around. If we crash somewhere before 1371 * completing our work, the link count 1372 * may be wrong, but correctable. 1373 */ 1374 fip->i_effnlink++; 1375 fip->i_nlink++; 1376 DIP_SET(fip, i_nlink, fip->i_nlink); 1377 fip->i_flag |= IN_CHANGE; 1378 if (DOINGSOFTDEP(fvp)) 1379 softdep_setup_link(tdp, fip); 1380 error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | DOINGASYNC(fvp))); 1381 if (error) 1382 goto bad; 1383 1384 /* 1385 * 2) If target doesn't exist, link the target 1386 * to the source and unlink the source. 1387 * Otherwise, rewrite the target directory 1388 * entry to reference the source inode and 1389 * expunge the original entry's existence. 1390 */ 1391 if (tip == NULL) { 1392 if (tdp->i_dev != fip->i_dev) 1393 panic("ufs_rename: EXDEV"); 1394 if (doingdirectory && newparent) { 1395 /* 1396 * Account for ".." in new directory. 1397 * When source and destination have the same 1398 * parent we don't adjust the link count. The 1399 * actual link modification is completed when 1400 * .. is rewritten below. 1401 */ 1402 if ((nlink_t)tdp->i_nlink >= LINK_MAX) { 1403 error = EMLINK; 1404 goto bad; 1405 } 1406 } 1407 ufs_makedirentry(fip, tcnp, &newdir); 1408 error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1); 1409 if (error) 1410 goto bad; 1411 /* Setup tdvp for directory compaction if needed. */ 1412 if (tdp->i_count && tdp->i_endoff && 1413 tdp->i_endoff < tdp->i_size) 1414 endoff = tdp->i_endoff; 1415 } else { 1416 if (tip->i_dev != tdp->i_dev || tip->i_dev != fip->i_dev) 1417 panic("ufs_rename: EXDEV"); 1418 /* 1419 * Short circuit rename(foo, foo). 1420 */ 1421 if (tip->i_number == fip->i_number) 1422 panic("ufs_rename: same file"); 1423 /* 1424 * If the parent directory is "sticky", then the caller 1425 * must possess VADMIN for the parent directory, or the 1426 * destination of the rename. This implements append-only 1427 * directories. 1428 */ 1429 if ((tdp->i_mode & S_ISTXT) && 1430 VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) && 1431 VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) { 1432 error = EPERM; 1433 goto bad; 1434 } 1435 /* 1436 * Target must be empty if a directory and have no links 1437 * to it. Also, ensure source and target are compatible 1438 * (both directories, or both not directories). 1439 */ 1440 if ((tip->i_mode & IFMT) == IFDIR) { 1441 if ((tip->i_effnlink > 2) || 1442 !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) { 1443 error = ENOTEMPTY; 1444 goto bad; 1445 } 1446 if (!doingdirectory) { 1447 error = ENOTDIR; 1448 goto bad; 1449 } 1450 cache_purge(tdvp); 1451 } else if (doingdirectory) { 1452 error = EISDIR; 1453 goto bad; 1454 } 1455 if (doingdirectory) { 1456 if (!newparent) { 1457 tdp->i_effnlink--; 1458 if (DOINGSOFTDEP(tdvp)) 1459 softdep_change_linkcnt(tdp); 1460 } 1461 tip->i_effnlink--; 1462 if (DOINGSOFTDEP(tvp)) 1463 softdep_change_linkcnt(tip); 1464 } 1465 error = ufs_dirrewrite(tdp, tip, fip->i_number, 1466 IFTODT(fip->i_mode), 1467 (doingdirectory && newparent) ? newparent : doingdirectory); 1468 if (error) { 1469 if (doingdirectory) { 1470 if (!newparent) { 1471 tdp->i_effnlink++; 1472 if (DOINGSOFTDEP(tdvp)) 1473 softdep_change_linkcnt(tdp); 1474 } 1475 tip->i_effnlink++; 1476 if (DOINGSOFTDEP(tvp)) 1477 softdep_change_linkcnt(tip); 1478 } 1479 } 1480 if (doingdirectory && !DOINGSOFTDEP(tvp)) { 1481 /* 1482 * The only stuff left in the directory is "." 1483 * and "..". The "." reference is inconsequential 1484 * since we are quashing it. We have removed the "." 1485 * reference and the reference in the parent directory, 1486 * but there may be other hard links. The soft 1487 * dependency code will arrange to do these operations 1488 * after the parent directory entry has been deleted on 1489 * disk, so when running with that code we avoid doing 1490 * them now. 1491 */ 1492 if (!newparent) { 1493 tdp->i_nlink--; 1494 DIP_SET(tdp, i_nlink, tdp->i_nlink); 1495 tdp->i_flag |= IN_CHANGE; 1496 } 1497 tip->i_nlink--; 1498 DIP_SET(tip, i_nlink, tip->i_nlink); 1499 tip->i_flag |= IN_CHANGE; 1500 } 1501 } 1502 1503 /* 1504 * 3) Unlink the source. We have to resolve the path again to 1505 * fixup the directory offset and count for ufs_dirremove. 1506 */ 1507 if (fdvp == tdvp) { 1508 error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); 1509 if (error) 1510 panic("ufs_rename: from entry went away!"); 1511 if (ino != fip->i_number) 1512 panic("ufs_rename: ino mismatch %d != %d\n", ino, 1513 fip->i_number); 1514 } 1515 /* 1516 * If the source is a directory with a 1517 * new parent, the link count of the old 1518 * parent directory must be decremented 1519 * and ".." set to point to the new parent. 1520 */ 1521 if (doingdirectory && newparent) { 1522 /* 1523 * If tip exists we simply use its link, otherwise we must 1524 * add a new one. 1525 */ 1526 if (tip == NULL) { 1527 tdp->i_effnlink++; 1528 tdp->i_nlink++; 1529 DIP_SET(tdp, i_nlink, tdp->i_nlink); 1530 tdp->i_flag |= IN_CHANGE; 1531 if (DOINGSOFTDEP(tdvp)) 1532 softdep_setup_dotdot_link(tdp, fip); 1533 error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) | 1534 DOINGASYNC(tdvp))); 1535 /* Don't go to bad here as the new link exists. */ 1536 if (error) 1537 goto unlockout; 1538 } else if (DOINGSUJ(tdvp)) 1539 /* Journal must account for each new link. */ 1540 softdep_setup_dotdot_link(tdp, fip); 1541 fip->i_offset = mastertemplate.dot_reclen; 1542 ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0); 1543 cache_purge(fdvp); 1544 } 1545 error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0); 1546 /* 1547 * The kern_renameat() looks up the fvp using the DELETE flag, which 1548 * causes the removal of the name cache entry for fvp. 1549 * As the relookup of the fvp is done in two steps: 1550 * ufs_lookup_ino() and then VFS_VGET(), another thread might do a 1551 * normal lookup of the from name just before the VFS_VGET() call, 1552 * causing the cache entry to be re-instantiated. 1553 * 1554 * The same issue also applies to tvp if it exists as 1555 * otherwise we may have a stale name cache entry for the new 1556 * name that references the old i-node if it has other links 1557 * or open file descriptors. 1558 */ 1559 cache_purge(fvp); 1560 if (tvp) 1561 cache_purge(tvp); 1562 1563unlockout: 1564 vput(fdvp); 1565 vput(fvp); 1566 if (tvp) 1567 vput(tvp); 1568 /* 1569 * If compaction or fsync was requested do it now that other locks 1570 * are no longer needed. 1571 */ 1572 if (error == 0 && endoff != 0) { 1573#ifdef UFS_DIRHASH 1574 if (tdp->i_dirhash != NULL) 1575 ufsdirhash_dirtrunc(tdp, endoff); 1576#endif 1577 UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | IO_SYNC, tcnp->cn_cred, 1578 td); 1579 } 1580 if (error == 0 && tdp->i_flag & IN_NEEDSYNC) 1581 error = VOP_FSYNC(tdvp, MNT_WAIT, td); 1582 vput(tdvp); 1583 if (mp) 1584 vfs_unbusy(mp); 1585 return (error); 1586 1587bad: 1588 fip->i_effnlink--; 1589 fip->i_nlink--; 1590 DIP_SET(fip, i_nlink, fip->i_nlink); 1591 fip->i_flag |= IN_CHANGE; 1592 if (DOINGSOFTDEP(fvp)) 1593 softdep_revert_link(tdp, fip); 1594 goto unlockout; 1595 1596releout: 1597 vrele(fdvp); 1598 vrele(fvp); 1599 vrele(tdvp); 1600 if (tvp) 1601 vrele(tvp); 1602 if (mp) 1603 vfs_unbusy(mp); 1604 1605 return (error); 1606} 1607 1608#ifdef UFS_ACL 1609static int 1610ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp, 1611 mode_t dmode, struct ucred *cred, struct thread *td) 1612{ 1613 int error; 1614 struct inode *ip = VTOI(tvp); 1615 struct acl *dacl, *acl; 1616 1617 acl = acl_alloc(M_WAITOK); 1618 dacl = acl_alloc(M_WAITOK); 1619 1620 /* 1621 * Retrieve default ACL from parent, if any. 1622 */ 1623 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); 1624 switch (error) { 1625 case 0: 1626 /* 1627 * Retrieved a default ACL, so merge mode and ACL if 1628 * necessary. If the ACL is empty, fall through to 1629 * the "not defined or available" case. 1630 */ 1631 if (acl->acl_cnt != 0) { 1632 dmode = acl_posix1e_newfilemode(dmode, acl); 1633 ip->i_mode = dmode; 1634 DIP_SET(ip, i_mode, dmode); 1635 *dacl = *acl; 1636 ufs_sync_acl_from_inode(ip, acl); 1637 break; 1638 } 1639 /* FALLTHROUGH */ 1640 1641 case EOPNOTSUPP: 1642 /* 1643 * Just use the mode as-is. 1644 */ 1645 ip->i_mode = dmode; 1646 DIP_SET(ip, i_mode, dmode); 1647 error = 0; 1648 goto out; 1649 1650 default: 1651 goto out; 1652 } 1653 1654 /* 1655 * XXX: If we abort now, will Soft Updates notify the extattr 1656 * code that the EAs for the file need to be released? 1657 */ 1658 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); 1659 if (error == 0) 1660 error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td); 1661 switch (error) { 1662 case 0: 1663 break; 1664 1665 case EOPNOTSUPP: 1666 /* 1667 * XXX: This should not happen, as EOPNOTSUPP above 1668 * was supposed to free acl. 1669 */ 1670 printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n"); 1671 /* 1672 panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()"); 1673 */ 1674 break; 1675 1676 default: 1677 goto out; 1678 } 1679 1680out: 1681 acl_free(acl); 1682 acl_free(dacl); 1683 1684 return (error); 1685} 1686 1687static int 1688ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp, 1689 mode_t mode, struct ucred *cred, struct thread *td) 1690{ 1691 int error; 1692 struct inode *ip = VTOI(tvp); 1693 struct acl *acl; 1694 1695 acl = acl_alloc(M_WAITOK); 1696 1697 /* 1698 * Retrieve default ACL for parent, if any. 1699 */ 1700 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); 1701 switch (error) { 1702 case 0: 1703 /* 1704 * Retrieved a default ACL, so merge mode and ACL if 1705 * necessary. 1706 */ 1707 if (acl->acl_cnt != 0) { 1708 /* 1709 * Two possible ways for default ACL to not 1710 * be present. First, the EA can be 1711 * undefined, or second, the default ACL can 1712 * be blank. If it's blank, fall through to 1713 * the it's not defined case. 1714 */ 1715 mode = acl_posix1e_newfilemode(mode, acl); 1716 ip->i_mode = mode; 1717 DIP_SET(ip, i_mode, mode); 1718 ufs_sync_acl_from_inode(ip, acl); 1719 break; 1720 } 1721 /* FALLTHROUGH */ 1722 1723 case EOPNOTSUPP: 1724 /* 1725 * Just use the mode as-is. 1726 */ 1727 ip->i_mode = mode; 1728 DIP_SET(ip, i_mode, mode); 1729 error = 0; 1730 goto out; 1731 1732 default: 1733 goto out; 1734 } 1735 1736 /* 1737 * XXX: If we abort now, will Soft Updates notify the extattr 1738 * code that the EAs for the file need to be released? 1739 */ 1740 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); 1741 switch (error) { 1742 case 0: 1743 break; 1744 1745 case EOPNOTSUPP: 1746 /* 1747 * XXX: This should not happen, as EOPNOTSUPP above was 1748 * supposed to free acl. 1749 */ 1750 printf("ufs_makeinode: VOP_GETACL() but no " 1751 "VOP_SETACL()\n"); 1752 /* panic("ufs_makeinode: VOP_GETACL() but no " 1753 "VOP_SETACL()"); */ 1754 break; 1755 1756 default: 1757 goto out; 1758 } 1759 1760out: 1761 acl_free(acl); 1762 1763 return (error); 1764} 1765 1766static int 1767ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp, 1768 mode_t child_mode, struct ucred *cred, struct thread *td) 1769{ 1770 int error; 1771 struct acl *parent_aclp, *child_aclp; 1772 1773 parent_aclp = acl_alloc(M_WAITOK); 1774 child_aclp = acl_alloc(M_WAITOK | M_ZERO); 1775 1776 error = ufs_getacl_nfs4_internal(dvp, parent_aclp, td); 1777 if (error) 1778 goto out; 1779 acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp, 1780 child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR); 1781 error = ufs_setacl_nfs4_internal(tvp, child_aclp, td); 1782 if (error) 1783 goto out; 1784out: 1785 acl_free(parent_aclp); 1786 acl_free(child_aclp); 1787 1788 return (error); 1789} 1790#endif 1791 1792/* 1793 * Mkdir system call 1794 */ 1795static int 1796ufs_mkdir(ap) 1797 struct vop_mkdir_args /* { 1798 struct vnode *a_dvp; 1799 struct vnode **a_vpp; 1800 struct componentname *a_cnp; 1801 struct vattr *a_vap; 1802 } */ *ap; 1803{ 1804 struct vnode *dvp = ap->a_dvp; 1805 struct vattr *vap = ap->a_vap; 1806 struct componentname *cnp = ap->a_cnp; 1807 struct inode *ip, *dp; 1808 struct vnode *tvp; 1809 struct buf *bp; 1810 struct dirtemplate dirtemplate, *dtp; 1811 struct direct newdir; 1812 int error, dmode; 1813 long blkoff; 1814 1815#ifdef INVARIANTS 1816 if ((cnp->cn_flags & HASBUF) == 0) 1817 panic("ufs_mkdir: no name"); 1818#endif 1819 dp = VTOI(dvp); 1820 if ((nlink_t)dp->i_nlink >= LINK_MAX) { 1821 error = EMLINK; 1822 goto out; 1823 } 1824 dmode = vap->va_mode & 0777; 1825 dmode |= IFDIR; 1826 /* 1827 * Must simulate part of ufs_makeinode here to acquire the inode, 1828 * but not have it entered in the parent directory. The entry is 1829 * made later after writing "." and ".." entries. 1830 */ 1831 error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); 1832 if (error) 1833 goto out; 1834 ip = VTOI(tvp); 1835 ip->i_gid = dp->i_gid; 1836 DIP_SET(ip, i_gid, dp->i_gid); 1837#ifdef SUIDDIR 1838 { 1839#ifdef QUOTA 1840 struct ucred ucred, *ucp; 1841 gid_t ucred_group; 1842 ucp = cnp->cn_cred; 1843#endif 1844 /* 1845 * If we are hacking owners here, (only do this where told to) 1846 * and we are not giving it TO root, (would subvert quotas) 1847 * then go ahead and give it to the other user. 1848 * The new directory also inherits the SUID bit. 1849 * If user's UID and dir UID are the same, 1850 * 'give it away' so that the SUID is still forced on. 1851 */ 1852 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1853 (dp->i_mode & ISUID) && dp->i_uid) { 1854 dmode |= ISUID; 1855 ip->i_uid = dp->i_uid; 1856 DIP_SET(ip, i_uid, dp->i_uid); 1857#ifdef QUOTA 1858 if (dp->i_uid != cnp->cn_cred->cr_uid) { 1859 /* 1860 * Make sure the correct user gets charged 1861 * for the space. 1862 * Make a dummy credential for the victim. 1863 * XXX This seems to never be accessed out of 1864 * our context so a stack variable is ok. 1865 */ 1866 refcount_init(&ucred.cr_ref, 1); 1867 ucred.cr_uid = ip->i_uid; 1868 ucred.cr_ngroups = 1; 1869 ucred.cr_groups = &ucred_group; 1870 ucred.cr_groups[0] = dp->i_gid; 1871 ucp = &ucred; 1872 } 1873#endif 1874 } else { 1875 ip->i_uid = cnp->cn_cred->cr_uid; 1876 DIP_SET(ip, i_uid, ip->i_uid); 1877 } 1878#ifdef QUOTA 1879 if ((error = getinoquota(ip)) || 1880 (error = chkiq(ip, 1, ucp, 0))) { 1881 if (DOINGSOFTDEP(tvp)) 1882 softdep_revert_link(dp, ip); 1883 UFS_VFREE(tvp, ip->i_number, dmode); 1884 vput(tvp); 1885 return (error); 1886 } 1887#endif 1888 } 1889#else /* !SUIDDIR */ 1890 ip->i_uid = cnp->cn_cred->cr_uid; 1891 DIP_SET(ip, i_uid, ip->i_uid); 1892#ifdef QUOTA 1893 if ((error = getinoquota(ip)) || 1894 (error = chkiq(ip, 1, cnp->cn_cred, 0))) { 1895 if (DOINGSOFTDEP(tvp)) 1896 softdep_revert_link(dp, ip); 1897 UFS_VFREE(tvp, ip->i_number, dmode); 1898 vput(tvp); 1899 return (error); 1900 } 1901#endif 1902#endif /* !SUIDDIR */ 1903 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1904 ip->i_mode = dmode; 1905 DIP_SET(ip, i_mode, dmode); 1906 tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ 1907 ip->i_effnlink = 2; 1908 ip->i_nlink = 2; 1909 DIP_SET(ip, i_nlink, 2); 1910 1911 if (cnp->cn_flags & ISWHITEOUT) { 1912 ip->i_flags |= UF_OPAQUE; 1913 DIP_SET(ip, i_flags, ip->i_flags); 1914 } 1915 1916 /* 1917 * Bump link count in parent directory to reflect work done below. 1918 * Should be done before reference is created so cleanup is 1919 * possible if we crash. 1920 */ 1921 dp->i_effnlink++; 1922 dp->i_nlink++; 1923 DIP_SET(dp, i_nlink, dp->i_nlink); 1924 dp->i_flag |= IN_CHANGE; 1925 if (DOINGSOFTDEP(dvp)) 1926 softdep_setup_mkdir(dp, ip); 1927 error = UFS_UPDATE(dvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp))); 1928 if (error) 1929 goto bad; 1930#ifdef MAC 1931 if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { 1932 error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount, 1933 dvp, tvp, cnp); 1934 if (error) 1935 goto bad; 1936 } 1937#endif 1938#ifdef UFS_ACL 1939 if (dvp->v_mount->mnt_flag & MNT_ACLS) { 1940 error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode, 1941 cnp->cn_cred, cnp->cn_thread); 1942 if (error) 1943 goto bad; 1944 } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) { 1945 error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode, 1946 cnp->cn_cred, cnp->cn_thread); 1947 if (error) 1948 goto bad; 1949 } 1950#endif /* !UFS_ACL */ 1951 1952 /* 1953 * Initialize directory with "." and ".." from static template. 1954 */ 1955 if (dvp->v_mount->mnt_maxsymlinklen > 0) 1956 dtp = &mastertemplate; 1957 else 1958 dtp = (struct dirtemplate *)&omastertemplate; 1959 dirtemplate = *dtp; 1960 dirtemplate.dot_ino = ip->i_number; 1961 dirtemplate.dotdot_ino = dp->i_number; 1962 if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred, 1963 BA_CLRBUF, &bp)) != 0) 1964 goto bad; 1965 ip->i_size = DIRBLKSIZ; 1966 DIP_SET(ip, i_size, DIRBLKSIZ); 1967 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1968 vnode_pager_setsize(tvp, (u_long)ip->i_size); 1969 bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate); 1970 if (DOINGSOFTDEP(tvp)) { 1971 /* 1972 * Ensure that the entire newly allocated block is a 1973 * valid directory so that future growth within the 1974 * block does not have to ensure that the block is 1975 * written before the inode. 1976 */ 1977 blkoff = DIRBLKSIZ; 1978 while (blkoff < bp->b_bcount) { 1979 ((struct direct *) 1980 (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; 1981 blkoff += DIRBLKSIZ; 1982 } 1983 } 1984 if ((error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) | 1985 DOINGASYNC(tvp)))) != 0) { 1986 (void)bwrite(bp); 1987 goto bad; 1988 } 1989 /* 1990 * Directory set up, now install its entry in the parent directory. 1991 * 1992 * If we are not doing soft dependencies, then we must write out the 1993 * buffer containing the new directory body before entering the new 1994 * name in the parent. If we are doing soft dependencies, then the 1995 * buffer containing the new directory body will be passed to and 1996 * released in the soft dependency code after the code has attached 1997 * an appropriate ordering dependency to the buffer which ensures that 1998 * the buffer is written before the new name is written in the parent. 1999 */ 2000 if (DOINGASYNC(dvp)) 2001 bdwrite(bp); 2002 else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp)))) 2003 goto bad; 2004 ufs_makedirentry(ip, cnp, &newdir); 2005 error = ufs_direnter(dvp, tvp, &newdir, cnp, bp, 0); 2006 2007bad: 2008 if (error == 0) { 2009 *ap->a_vpp = tvp; 2010 } else { 2011 dp->i_effnlink--; 2012 dp->i_nlink--; 2013 DIP_SET(dp, i_nlink, dp->i_nlink); 2014 dp->i_flag |= IN_CHANGE; 2015 /* 2016 * No need to do an explicit VOP_TRUNCATE here, vrele will 2017 * do this for us because we set the link count to 0. 2018 */ 2019 ip->i_effnlink = 0; 2020 ip->i_nlink = 0; 2021 DIP_SET(ip, i_nlink, 0); 2022 ip->i_flag |= IN_CHANGE; 2023 if (DOINGSOFTDEP(tvp)) 2024 softdep_revert_mkdir(dp, ip); 2025 2026 vput(tvp); 2027 } 2028out: 2029 return (error); 2030} 2031 2032/* 2033 * Rmdir system call. 2034 */ 2035static int 2036ufs_rmdir(ap) 2037 struct vop_rmdir_args /* { 2038 struct vnode *a_dvp; 2039 struct vnode *a_vp; 2040 struct componentname *a_cnp; 2041 } */ *ap; 2042{ 2043 struct vnode *vp = ap->a_vp; 2044 struct vnode *dvp = ap->a_dvp; 2045 struct componentname *cnp = ap->a_cnp; 2046 struct inode *ip, *dp; 2047 int error; 2048 2049 ip = VTOI(vp); 2050 dp = VTOI(dvp); 2051 2052 /* 2053 * Do not remove a directory that is in the process of being renamed. 2054 * Verify the directory is empty (and valid). Rmdir ".." will not be 2055 * valid since ".." will contain a reference to the current directory 2056 * and thus be non-empty. Do not allow the removal of mounted on 2057 * directories (this can happen when an NFS exported filesystem 2058 * tries to remove a locally mounted on directory). 2059 */ 2060 error = 0; 2061 if (ip->i_effnlink < 2) { 2062 error = EINVAL; 2063 goto out; 2064 } 2065 if (dp->i_effnlink < 3) 2066 panic("ufs_dirrem: Bad link count %d on parent", 2067 dp->i_effnlink); 2068 if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { 2069 error = ENOTEMPTY; 2070 goto out; 2071 } 2072 if ((dp->i_flags & APPEND) 2073 || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { 2074 error = EPERM; 2075 goto out; 2076 } 2077 if (vp->v_mountedhere != 0) { 2078 error = EINVAL; 2079 goto out; 2080 } 2081#ifdef UFS_GJOURNAL 2082 ufs_gjournal_orphan(vp); 2083#endif 2084 /* 2085 * Delete reference to directory before purging 2086 * inode. If we crash in between, the directory 2087 * will be reattached to lost+found, 2088 */ 2089 dp->i_effnlink--; 2090 ip->i_effnlink--; 2091 if (DOINGSOFTDEP(vp)) 2092 softdep_setup_rmdir(dp, ip); 2093 error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1); 2094 if (error) { 2095 dp->i_effnlink++; 2096 ip->i_effnlink++; 2097 if (DOINGSOFTDEP(vp)) 2098 softdep_revert_rmdir(dp, ip); 2099 goto out; 2100 } 2101 cache_purge(dvp); 2102 /* 2103 * The only stuff left in the directory is "." and "..". The "." 2104 * reference is inconsequential since we are quashing it. The soft 2105 * dependency code will arrange to do these operations after 2106 * the parent directory entry has been deleted on disk, so 2107 * when running with that code we avoid doing them now. 2108 */ 2109 if (!DOINGSOFTDEP(vp)) { 2110 dp->i_nlink--; 2111 DIP_SET(dp, i_nlink, dp->i_nlink); 2112 dp->i_flag |= IN_CHANGE; 2113 error = UFS_UPDATE(dvp, 0); 2114 ip->i_nlink--; 2115 DIP_SET(ip, i_nlink, ip->i_nlink); 2116 ip->i_flag |= IN_CHANGE; 2117 } 2118 cache_purge(vp); 2119#ifdef UFS_DIRHASH 2120 /* Kill any active hash; i_effnlink == 0, so it will not come back. */ 2121 if (ip->i_dirhash != NULL) 2122 ufsdirhash_free(ip); 2123#endif 2124out: 2125 return (error); 2126} 2127 2128/* 2129 * symlink -- make a symbolic link 2130 */ 2131static int 2132ufs_symlink(ap) 2133 struct vop_symlink_args /* { 2134 struct vnode *a_dvp; 2135 struct vnode **a_vpp; 2136 struct componentname *a_cnp; 2137 struct vattr *a_vap; 2138 char *a_target; 2139 } */ *ap; 2140{ 2141 struct vnode *vp, **vpp = ap->a_vpp; 2142 struct inode *ip; 2143 int len, error; 2144 2145 error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, 2146 vpp, ap->a_cnp); 2147 if (error) 2148 return (error); 2149 vp = *vpp; 2150 len = strlen(ap->a_target); 2151 if (len < vp->v_mount->mnt_maxsymlinklen) { 2152 ip = VTOI(vp); 2153 bcopy(ap->a_target, SHORTLINK(ip), len); 2154 ip->i_size = len; 2155 DIP_SET(ip, i_size, len); 2156 ip->i_flag |= IN_CHANGE | IN_UPDATE; 2157 error = UFS_UPDATE(vp, 0); 2158 } else 2159 error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, 2160 UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 2161 ap->a_cnp->cn_cred, NOCRED, NULL, NULL); 2162 if (error) 2163 vput(vp); 2164 return (error); 2165} 2166 2167/* 2168 * Vnode op for reading directories. 2169 * 2170 * The routine below assumes that the on-disk format of a directory 2171 * is the same as that defined by <sys/dirent.h>. If the on-disk 2172 * format changes, then it will be necessary to do a conversion 2173 * from the on-disk format that read returns to the format defined 2174 * by <sys/dirent.h>. 2175 */ 2176int 2177ufs_readdir(ap) 2178 struct vop_readdir_args /* { 2179 struct vnode *a_vp; 2180 struct uio *a_uio; 2181 struct ucred *a_cred; 2182 int *a_eofflag; 2183 int *a_ncookies; 2184 u_long **a_cookies; 2185 } */ *ap; 2186{ 2187 struct uio *uio = ap->a_uio; 2188 struct inode *ip; 2189 int error; 2190 size_t count, lost; 2191 off_t off; 2192 2193 if (ap->a_ncookies != NULL) 2194 /* 2195 * Ensure that the block is aligned. The caller can use 2196 * the cookies to determine where in the block to start. 2197 */ 2198 uio->uio_offset &= ~(DIRBLKSIZ - 1); 2199 ip = VTOI(ap->a_vp); 2200 if (ip->i_effnlink == 0) 2201 return (0); 2202 off = uio->uio_offset; 2203 count = uio->uio_resid; 2204 /* Make sure we don't return partial entries. */ 2205 if (count <= ((uio->uio_offset + count) & (DIRBLKSIZ -1))) 2206 return (EINVAL); 2207 count -= (uio->uio_offset + count) & (DIRBLKSIZ -1); 2208 lost = uio->uio_resid - count; 2209 uio->uio_resid = count; 2210 uio->uio_iov->iov_len = count; 2211# if (BYTE_ORDER == LITTLE_ENDIAN) 2212 if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) { 2213 error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred); 2214 } else { 2215 struct dirent *dp, *edp; 2216 struct uio auio; 2217 struct iovec aiov; 2218 caddr_t dirbuf; 2219 int readcnt; 2220 u_char tmp; 2221 2222 auio = *uio; 2223 auio.uio_iov = &aiov; 2224 auio.uio_iovcnt = 1; 2225 auio.uio_segflg = UIO_SYSSPACE; 2226 aiov.iov_len = count; 2227 dirbuf = malloc(count, M_TEMP, M_WAITOK); 2228 aiov.iov_base = dirbuf; 2229 error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred); 2230 if (error == 0) { 2231 readcnt = count - auio.uio_resid; 2232 edp = (struct dirent *)&dirbuf[readcnt]; 2233 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 2234 tmp = dp->d_namlen; 2235 dp->d_namlen = dp->d_type; 2236 dp->d_type = tmp; 2237 if (dp->d_reclen > 0) { 2238 dp = (struct dirent *) 2239 ((char *)dp + dp->d_reclen); 2240 } else { 2241 error = EIO; 2242 break; 2243 } 2244 } 2245 if (dp >= edp) 2246 error = uiomove(dirbuf, readcnt, uio); 2247 } 2248 free(dirbuf, M_TEMP); 2249 } 2250# else 2251 error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred); 2252# endif 2253 if (!error && ap->a_ncookies != NULL) { 2254 struct dirent* dpStart; 2255 struct dirent* dpEnd; 2256 struct dirent* dp; 2257 int ncookies; 2258 u_long *cookies; 2259 u_long *cookiep; 2260 2261 if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2262 panic("ufs_readdir: unexpected uio from NFS server"); 2263 dpStart = (struct dirent *) 2264 ((char *)uio->uio_iov->iov_base - (uio->uio_offset - off)); 2265 dpEnd = (struct dirent *) uio->uio_iov->iov_base; 2266 for (dp = dpStart, ncookies = 0; 2267 dp < dpEnd; 2268 dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) 2269 ncookies++; 2270 cookies = malloc(ncookies * sizeof(u_long), M_TEMP, 2271 M_WAITOK); 2272 for (dp = dpStart, cookiep = cookies; 2273 dp < dpEnd; 2274 dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) { 2275 off += dp->d_reclen; 2276 *cookiep++ = (u_long) off; 2277 } 2278 *ap->a_ncookies = ncookies; 2279 *ap->a_cookies = cookies; 2280 } 2281 uio->uio_resid += lost; 2282 if (ap->a_eofflag) 2283 *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset; 2284 return (error); 2285} 2286 2287/* 2288 * Return target name of a symbolic link 2289 */ 2290static int 2291ufs_readlink(ap) 2292 struct vop_readlink_args /* { 2293 struct vnode *a_vp; 2294 struct uio *a_uio; 2295 struct ucred *a_cred; 2296 } */ *ap; 2297{ 2298 struct vnode *vp = ap->a_vp; 2299 struct inode *ip = VTOI(vp); 2300 doff_t isize; 2301 2302 isize = ip->i_size; 2303 if ((isize < vp->v_mount->mnt_maxsymlinklen) || 2304 DIP(ip, i_blocks) == 0) { /* XXX - for old fastlink support */ 2305 return (uiomove(SHORTLINK(ip), isize, ap->a_uio)); 2306 } 2307 return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); 2308} 2309 2310/* 2311 * Calculate the logical to physical mapping if not done already, 2312 * then call the device strategy routine. 2313 * 2314 * In order to be able to swap to a file, the ufs_bmaparray() operation may not 2315 * deadlock on memory. See ufs_bmap() for details. 2316 */ 2317static int 2318ufs_strategy(ap) 2319 struct vop_strategy_args /* { 2320 struct vnode *a_vp; 2321 struct buf *a_bp; 2322 } */ *ap; 2323{ 2324 struct buf *bp = ap->a_bp; 2325 struct vnode *vp = ap->a_vp; 2326 struct bufobj *bo; 2327 struct inode *ip; 2328 ufs2_daddr_t blkno; 2329 int error; 2330 2331 ip = VTOI(vp); 2332 if (bp->b_blkno == bp->b_lblkno) { 2333 error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL); 2334 bp->b_blkno = blkno; 2335 if (error) { 2336 bp->b_error = error; 2337 bp->b_ioflags |= BIO_ERROR; 2338 bufdone(bp); 2339 return (0); 2340 } 2341 if ((long)bp->b_blkno == -1) 2342 vfs_bio_clrbuf(bp); 2343 } 2344 if ((long)bp->b_blkno == -1) { 2345 bufdone(bp); 2346 return (0); 2347 } 2348 bp->b_iooffset = dbtob(bp->b_blkno); 2349 bo = ip->i_umbufobj; 2350 BO_STRATEGY(bo, bp); 2351 return (0); 2352} 2353 2354/* 2355 * Print out the contents of an inode. 2356 */ 2357static int 2358ufs_print(ap) 2359 struct vop_print_args /* { 2360 struct vnode *a_vp; 2361 } */ *ap; 2362{ 2363 struct vnode *vp = ap->a_vp; 2364 struct inode *ip = VTOI(vp); 2365 2366 printf("\tino %lu, on dev %s", (u_long)ip->i_number, 2367 devtoname(ip->i_dev)); 2368 if (vp->v_type == VFIFO) 2369 fifo_printinfo(vp); 2370 printf("\n"); 2371 return (0); 2372} 2373 2374/* 2375 * Close wrapper for fifos. 2376 * 2377 * Update the times on the inode then do device close. 2378 */ 2379static int 2380ufsfifo_close(ap) 2381 struct vop_close_args /* { 2382 struct vnode *a_vp; 2383 int a_fflag; 2384 struct ucred *a_cred; 2385 struct thread *a_td; 2386 } */ *ap; 2387{ 2388 struct vnode *vp = ap->a_vp; 2389 int usecount; 2390 2391 VI_LOCK(vp); 2392 usecount = vp->v_usecount; 2393 if (usecount > 1) 2394 ufs_itimes_locked(vp); 2395 VI_UNLOCK(vp); 2396 return (fifo_specops.vop_close(ap)); 2397} 2398 2399/* 2400 * Kqfilter wrapper for fifos. 2401 * 2402 * Fall through to ufs kqfilter routines if needed 2403 */ 2404static int 2405ufsfifo_kqfilter(ap) 2406 struct vop_kqfilter_args *ap; 2407{ 2408 int error; 2409 2410 error = fifo_specops.vop_kqfilter(ap); 2411 if (error) 2412 error = vfs_kqfilter(ap); 2413 return (error); 2414} 2415 2416/* 2417 * Return POSIX pathconf information applicable to fifos. 2418 */ 2419static int 2420ufsfifo_pathconf(ap) 2421 struct vop_pathconf_args /* { 2422 struct vnode *a_vp; 2423 int a_name; 2424 int *a_retval; 2425 } */ *ap; 2426{ 2427 2428 switch (ap->a_name) { 2429 case _PC_ACL_EXTENDED: 2430 case _PC_ACL_NFS4: 2431 case _PC_ACL_PATH_MAX: 2432 case _PC_MAC_PRESENT: 2433 return (ufs_pathconf(ap)); 2434 default: 2435 return (fifo_specops.vop_pathconf(ap)); 2436 } 2437 /* NOTREACHED */ 2438} 2439 2440/* 2441 * Return POSIX pathconf information applicable to ufs filesystems. 2442 */ 2443static int 2444ufs_pathconf(ap) 2445 struct vop_pathconf_args /* { 2446 struct vnode *a_vp; 2447 int a_name; 2448 int *a_retval; 2449 } */ *ap; 2450{ 2451 int error; 2452 2453 error = 0; 2454 switch (ap->a_name) { 2455 case _PC_LINK_MAX: 2456 *ap->a_retval = LINK_MAX; 2457 break; 2458 case _PC_NAME_MAX: 2459 *ap->a_retval = NAME_MAX; 2460 break; 2461 case _PC_PATH_MAX: 2462 *ap->a_retval = PATH_MAX; 2463 break; 2464 case _PC_PIPE_BUF: 2465 *ap->a_retval = PIPE_BUF; 2466 break; 2467 case _PC_CHOWN_RESTRICTED: 2468 *ap->a_retval = 1; 2469 break; 2470 case _PC_NO_TRUNC: 2471 *ap->a_retval = 1; 2472 break; 2473 case _PC_ACL_EXTENDED: 2474#ifdef UFS_ACL 2475 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) 2476 *ap->a_retval = 1; 2477 else 2478 *ap->a_retval = 0; 2479#else 2480 *ap->a_retval = 0; 2481#endif 2482 break; 2483 2484 case _PC_ACL_NFS4: 2485#ifdef UFS_ACL 2486 if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS) 2487 *ap->a_retval = 1; 2488 else 2489 *ap->a_retval = 0; 2490#else 2491 *ap->a_retval = 0; 2492#endif 2493 break; 2494 2495 case _PC_ACL_PATH_MAX: 2496#ifdef UFS_ACL 2497 if (ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) 2498 *ap->a_retval = ACL_MAX_ENTRIES; 2499 else 2500 *ap->a_retval = 3; 2501#else 2502 *ap->a_retval = 3; 2503#endif 2504 break; 2505 case _PC_MAC_PRESENT: 2506#ifdef MAC 2507 if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL) 2508 *ap->a_retval = 1; 2509 else 2510 *ap->a_retval = 0; 2511#else 2512 *ap->a_retval = 0; 2513#endif 2514 break; 2515 case _PC_ASYNC_IO: 2516 /* _PC_ASYNC_IO should have been handled by upper layers. */ 2517 KASSERT(0, ("_PC_ASYNC_IO should not get here")); 2518 error = EINVAL; 2519 break; 2520 case _PC_PRIO_IO: 2521 *ap->a_retval = 0; 2522 break; 2523 case _PC_SYNC_IO: 2524 *ap->a_retval = 0; 2525 break; 2526 case _PC_ALLOC_SIZE_MIN: 2527 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; 2528 break; 2529 case _PC_FILESIZEBITS: 2530 *ap->a_retval = 64; 2531 break; 2532 case _PC_REC_INCR_XFER_SIZE: 2533 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 2534 break; 2535 case _PC_REC_MAX_XFER_SIZE: 2536 *ap->a_retval = -1; /* means ``unlimited'' */ 2537 break; 2538 case _PC_REC_MIN_XFER_SIZE: 2539 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 2540 break; 2541 case _PC_REC_XFER_ALIGN: 2542 *ap->a_retval = PAGE_SIZE; 2543 break; 2544 case _PC_SYMLINK_MAX: 2545 *ap->a_retval = MAXPATHLEN; 2546 break; 2547 2548 default: 2549 error = EINVAL; 2550 break; 2551 } 2552 return (error); 2553} 2554 2555/* 2556 * Initialize the vnode associated with a new inode, handle aliased 2557 * vnodes. 2558 */ 2559int 2560ufs_vinit(mntp, fifoops, vpp) 2561 struct mount *mntp; 2562 struct vop_vector *fifoops; 2563 struct vnode **vpp; 2564{ 2565 struct inode *ip; 2566 struct vnode *vp; 2567 2568 vp = *vpp; 2569 ip = VTOI(vp); 2570 vp->v_type = IFTOVT(ip->i_mode); 2571 if (vp->v_type == VFIFO) 2572 vp->v_op = fifoops; 2573 ASSERT_VOP_LOCKED(vp, "ufs_vinit"); 2574 if (ip->i_number == ROOTINO) 2575 vp->v_vflag |= VV_ROOT; 2576 *vpp = vp; 2577 return (0); 2578} 2579 2580/* 2581 * Allocate a new inode. 2582 * Vnode dvp must be locked. 2583 */ 2584static int 2585ufs_makeinode(mode, dvp, vpp, cnp) 2586 int mode; 2587 struct vnode *dvp; 2588 struct vnode **vpp; 2589 struct componentname *cnp; 2590{ 2591 struct inode *ip, *pdir; 2592 struct direct newdir; 2593 struct vnode *tvp; 2594 int error; 2595 2596 pdir = VTOI(dvp); 2597#ifdef INVARIANTS 2598 if ((cnp->cn_flags & HASBUF) == 0) 2599 panic("ufs_makeinode: no name"); 2600#endif 2601 *vpp = NULL; 2602 if ((mode & IFMT) == 0) 2603 mode |= IFREG; 2604 2605 if (VTOI(dvp)->i_effnlink < 2) 2606 panic("ufs_makeinode: Bad link count %d on parent", 2607 VTOI(dvp)->i_effnlink); 2608 error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp); 2609 if (error) 2610 return (error); 2611 ip = VTOI(tvp); 2612 ip->i_gid = pdir->i_gid; 2613 DIP_SET(ip, i_gid, pdir->i_gid); 2614#ifdef SUIDDIR 2615 { 2616#ifdef QUOTA 2617 struct ucred ucred, *ucp; 2618 gid_t ucred_group; 2619 ucp = cnp->cn_cred; 2620#endif 2621 /* 2622 * If we are not the owner of the directory, 2623 * and we are hacking owners here, (only do this where told to) 2624 * and we are not giving it TO root, (would subvert quotas) 2625 * then go ahead and give it to the other user. 2626 * Note that this drops off the execute bits for security. 2627 */ 2628 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 2629 (pdir->i_mode & ISUID) && 2630 (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { 2631 ip->i_uid = pdir->i_uid; 2632 DIP_SET(ip, i_uid, ip->i_uid); 2633 mode &= ~07111; 2634#ifdef QUOTA 2635 /* 2636 * Make sure the correct user gets charged 2637 * for the space. 2638 * Quickly knock up a dummy credential for the victim. 2639 * XXX This seems to never be accessed out of our 2640 * context so a stack variable is ok. 2641 */ 2642 refcount_init(&ucred.cr_ref, 1); 2643 ucred.cr_uid = ip->i_uid; 2644 ucred.cr_ngroups = 1; 2645 ucred.cr_groups = &ucred_group; 2646 ucred.cr_groups[0] = pdir->i_gid; 2647 ucp = &ucred; 2648#endif 2649 } else { 2650 ip->i_uid = cnp->cn_cred->cr_uid; 2651 DIP_SET(ip, i_uid, ip->i_uid); 2652 } 2653 2654#ifdef QUOTA 2655 if ((error = getinoquota(ip)) || 2656 (error = chkiq(ip, 1, ucp, 0))) { 2657 if (DOINGSOFTDEP(tvp)) 2658 softdep_revert_link(pdir, ip); 2659 UFS_VFREE(tvp, ip->i_number, mode); 2660 vput(tvp); 2661 return (error); 2662 } 2663#endif 2664 } 2665#else /* !SUIDDIR */ 2666 ip->i_uid = cnp->cn_cred->cr_uid; 2667 DIP_SET(ip, i_uid, ip->i_uid); 2668#ifdef QUOTA 2669 if ((error = getinoquota(ip)) || 2670 (error = chkiq(ip, 1, cnp->cn_cred, 0))) { 2671 if (DOINGSOFTDEP(tvp)) 2672 softdep_revert_link(pdir, ip); 2673 UFS_VFREE(tvp, ip->i_number, mode); 2674 vput(tvp); 2675 return (error); 2676 } 2677#endif 2678#endif /* !SUIDDIR */ 2679 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 2680 ip->i_mode = mode; 2681 DIP_SET(ip, i_mode, mode); 2682 tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ 2683 ip->i_effnlink = 1; 2684 ip->i_nlink = 1; 2685 DIP_SET(ip, i_nlink, 1); 2686 if (DOINGSOFTDEP(tvp)) 2687 softdep_setup_create(VTOI(dvp), ip); 2688 if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && 2689 priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID, 0)) { 2690 ip->i_mode &= ~ISGID; 2691 DIP_SET(ip, i_mode, ip->i_mode); 2692 } 2693 2694 if (cnp->cn_flags & ISWHITEOUT) { 2695 ip->i_flags |= UF_OPAQUE; 2696 DIP_SET(ip, i_flags, ip->i_flags); 2697 } 2698 2699 /* 2700 * Make sure inode goes to disk before directory entry. 2701 */ 2702 error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) | DOINGASYNC(tvp))); 2703 if (error) 2704 goto bad; 2705#ifdef MAC 2706 if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { 2707 error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount, 2708 dvp, tvp, cnp); 2709 if (error) 2710 goto bad; 2711 } 2712#endif 2713#ifdef UFS_ACL 2714 if (dvp->v_mount->mnt_flag & MNT_ACLS) { 2715 error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp, mode, 2716 cnp->cn_cred, cnp->cn_thread); 2717 if (error) 2718 goto bad; 2719 } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) { 2720 error = ufs_do_nfs4_acl_inheritance(dvp, tvp, mode, 2721 cnp->cn_cred, cnp->cn_thread); 2722 if (error) 2723 goto bad; 2724 } 2725#endif /* !UFS_ACL */ 2726 ufs_makedirentry(ip, cnp, &newdir); 2727 error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0); 2728 if (error) 2729 goto bad; 2730 *vpp = tvp; 2731 return (0); 2732 2733bad: 2734 /* 2735 * Write error occurred trying to update the inode 2736 * or the directory so must deallocate the inode. 2737 */ 2738 ip->i_effnlink = 0; 2739 ip->i_nlink = 0; 2740 DIP_SET(ip, i_nlink, 0); 2741 ip->i_flag |= IN_CHANGE; 2742 if (DOINGSOFTDEP(tvp)) 2743 softdep_revert_create(VTOI(dvp), ip); 2744 vput(tvp); 2745 return (error); 2746} 2747 2748/* Global vfs data structures for ufs. */ 2749struct vop_vector ufs_vnodeops = { 2750 .vop_default = &default_vnodeops, 2751 .vop_fsync = VOP_PANIC, 2752 .vop_read = VOP_PANIC, 2753 .vop_reallocblks = VOP_PANIC, 2754 .vop_write = VOP_PANIC, 2755 .vop_accessx = ufs_accessx, 2756 .vop_bmap = ufs_bmap, 2757 .vop_cachedlookup = ufs_lookup, 2758 .vop_close = ufs_close, 2759 .vop_create = ufs_create, 2760 .vop_getattr = ufs_getattr, 2761 .vop_inactive = ufs_inactive, 2762 .vop_link = ufs_link, 2763 .vop_lookup = vfs_cache_lookup, 2764 .vop_markatime = ufs_markatime, 2765 .vop_mkdir = ufs_mkdir, 2766 .vop_mknod = ufs_mknod, 2767 .vop_open = ufs_open, 2768 .vop_pathconf = ufs_pathconf, 2769 .vop_poll = vop_stdpoll, 2770 .vop_print = ufs_print, 2771 .vop_readdir = ufs_readdir, 2772 .vop_readlink = ufs_readlink, 2773 .vop_reclaim = ufs_reclaim, 2774 .vop_remove = ufs_remove, 2775 .vop_rename = ufs_rename, 2776 .vop_rmdir = ufs_rmdir, 2777 .vop_setattr = ufs_setattr, 2778#ifdef MAC 2779 .vop_setlabel = vop_stdsetlabel_ea, 2780#endif 2781 .vop_strategy = ufs_strategy, 2782 .vop_symlink = ufs_symlink, 2783 .vop_whiteout = ufs_whiteout, 2784#ifdef UFS_EXTATTR 2785 .vop_getextattr = ufs_getextattr, 2786 .vop_deleteextattr = ufs_deleteextattr, 2787 .vop_setextattr = ufs_setextattr, 2788#endif 2789#ifdef UFS_ACL 2790 .vop_getacl = ufs_getacl, 2791 .vop_setacl = ufs_setacl, 2792 .vop_aclcheck = ufs_aclcheck, 2793#endif 2794}; 2795 2796struct vop_vector ufs_fifoops = { 2797 .vop_default = &fifo_specops, 2798 .vop_fsync = VOP_PANIC, 2799 .vop_accessx = ufs_accessx, 2800 .vop_close = ufsfifo_close, 2801 .vop_getattr = ufs_getattr, 2802 .vop_inactive = ufs_inactive, 2803 .vop_kqfilter = ufsfifo_kqfilter, 2804 .vop_markatime = ufs_markatime, 2805 .vop_pathconf = ufsfifo_pathconf, 2806 .vop_print = ufs_print, 2807 .vop_read = VOP_PANIC, 2808 .vop_reclaim = ufs_reclaim, 2809 .vop_setattr = ufs_setattr, 2810#ifdef MAC 2811 .vop_setlabel = vop_stdsetlabel_ea, 2812#endif 2813 .vop_write = VOP_PANIC, 2814#ifdef UFS_EXTATTR 2815 .vop_getextattr = ufs_getextattr, 2816 .vop_deleteextattr = ufs_deleteextattr, 2817 .vop_setextattr = ufs_setextattr, 2818#endif 2819#ifdef UFS_ACL 2820 .vop_getacl = ufs_getacl, 2821 .vop_setacl = ufs_setacl, 2822 .vop_aclcheck = ufs_aclcheck, 2823#endif 2824}; 2825