ufs_vnops.c revision 161473
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: head/sys/ufs/ufs/ufs_vnops.c 161473 2006-08-20 10:52:44Z pjd $"); 39 40#include "opt_mac.h" 41#include "opt_quota.h" 42#include "opt_suiddir.h" 43#include "opt_ufs.h" 44#include "opt_ffs.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/malloc.h> 49#include <sys/namei.h> 50#include <sys/kernel.h> 51#include <sys/fcntl.h> 52#include <sys/stat.h> 53#include <sys/bio.h> 54#include <sys/buf.h> 55#include <sys/mount.h> 56#include <sys/refcount.h> 57#include <sys/unistd.h> 58#include <sys/vnode.h> 59#include <sys/dirent.h> 60#include <sys/lockf.h> 61#include <sys/conf.h> 62#include <sys/acl.h> 63#include <sys/mac.h> 64#include <sys/jail.h> 65 66#include <machine/mutex.h> 67 68#include <sys/file.h> /* XXX */ 69 70#include <vm/vm.h> 71#include <vm/vm_extern.h> 72 73#include <fs/fifofs/fifo.h> 74 75#include <ufs/ufs/acl.h> 76#include <ufs/ufs/extattr.h> 77#include <ufs/ufs/quota.h> 78#include <ufs/ufs/inode.h> 79#include <ufs/ufs/dir.h> 80#include <ufs/ufs/ufsmount.h> 81#include <ufs/ufs/ufs_extern.h> 82#ifdef UFS_DIRHASH 83#include <ufs/ufs/dirhash.h> 84#endif 85 86#include <ufs/ffs/ffs_extern.h> 87 88static vop_access_t ufs_access; 89static vop_advlock_t ufs_advlock; 90static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *); 91static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *); 92static vop_close_t ufs_close; 93static vop_create_t ufs_create; 94static vop_getattr_t ufs_getattr; 95static vop_link_t ufs_link; 96static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *); 97static vop_mkdir_t ufs_mkdir; 98static vop_mknod_t ufs_mknod; 99static vop_open_t ufs_open; 100static vop_pathconf_t ufs_pathconf; 101static vop_print_t ufs_print; 102static vop_readlink_t ufs_readlink; 103static vop_remove_t ufs_remove; 104static vop_rename_t ufs_rename; 105static vop_rmdir_t ufs_rmdir; 106static vop_setattr_t ufs_setattr; 107static vop_strategy_t ufs_strategy; 108static vop_symlink_t ufs_symlink; 109static vop_whiteout_t ufs_whiteout; 110static vop_close_t ufsfifo_close; 111static vop_kqfilter_t ufsfifo_kqfilter; 112 113/* 114 * A virgin directory (no blushing please). 115 */ 116static struct dirtemplate mastertemplate = { 117 0, 12, DT_DIR, 1, ".", 118 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." 119}; 120static struct odirtemplate omastertemplate = { 121 0, 12, 1, ".", 122 0, DIRBLKSIZ - 12, 2, ".." 123}; 124 125void 126ufs_itimes(vp) 127 struct vnode *vp; 128{ 129 struct inode *ip; 130 struct timespec ts; 131 132 ip = VTOI(vp); 133 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) 134 return; 135 if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp)) 136 ip->i_flag |= IN_LAZYMOD; 137 else 138 ip->i_flag |= IN_MODIFIED; 139 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 140 vfs_timestamp(&ts); 141 if (ip->i_flag & IN_ACCESS) { 142 DIP_SET(ip, i_atime, ts.tv_sec); 143 DIP_SET(ip, i_atimensec, ts.tv_nsec); 144 } 145 if (ip->i_flag & IN_UPDATE) { 146 DIP_SET(ip, i_mtime, ts.tv_sec); 147 DIP_SET(ip, i_mtimensec, ts.tv_nsec); 148 ip->i_modrev++; 149 } 150 if (ip->i_flag & IN_CHANGE) { 151 DIP_SET(ip, i_ctime, ts.tv_sec); 152 DIP_SET(ip, i_ctimensec, ts.tv_nsec); 153 } 154 } 155 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); 156} 157 158/* 159 * Create a regular file 160 */ 161static int 162ufs_create(ap) 163 struct vop_create_args /* { 164 struct vnode *a_dvp; 165 struct vnode **a_vpp; 166 struct componentname *a_cnp; 167 struct vattr *a_vap; 168 } */ *ap; 169{ 170 int error; 171 172 error = 173 ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), 174 ap->a_dvp, ap->a_vpp, ap->a_cnp); 175 if (error) 176 return (error); 177 return (0); 178} 179 180/* 181 * Mknod vnode call 182 */ 183/* ARGSUSED */ 184static int 185ufs_mknod(ap) 186 struct vop_mknod_args /* { 187 struct vnode *a_dvp; 188 struct vnode **a_vpp; 189 struct componentname *a_cnp; 190 struct vattr *a_vap; 191 } */ *ap; 192{ 193 struct vattr *vap = ap->a_vap; 194 struct vnode **vpp = ap->a_vpp; 195 struct inode *ip; 196 ino_t ino; 197 int error; 198 199 error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 200 ap->a_dvp, vpp, ap->a_cnp); 201 if (error) 202 return (error); 203 ip = VTOI(*vpp); 204 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 205 if (vap->va_rdev != VNOVAL) { 206 /* 207 * Want to be able to use this to make badblock 208 * inodes, so don't truncate the dev number. 209 */ 210 DIP_SET(ip, i_rdev, vap->va_rdev); 211 } 212 /* 213 * Remove inode, then reload it through VFS_VGET so it is 214 * checked to see if it is an alias of an existing entry in 215 * the inode cache. XXX I don't believe this is necessary now. 216 */ 217 (*vpp)->v_type = VNON; 218 ino = ip->i_number; /* Save this before vgone() invalidates ip. */ 219 vgone(*vpp); 220 vput(*vpp); 221 error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); 222 if (error) { 223 *vpp = NULL; 224 return (error); 225 } 226 return (0); 227} 228 229/* 230 * Open called. 231 */ 232/* ARGSUSED */ 233static int 234ufs_open(struct vop_open_args *ap) 235{ 236 struct vnode *vp = ap->a_vp; 237 struct inode *ip; 238 239 if (vp->v_type == VCHR || vp->v_type == VBLK) 240 return (EOPNOTSUPP); 241 242 ip = VTOI(vp); 243 /* 244 * Files marked append-only must be opened for appending. 245 */ 246 if ((ip->i_flags & APPEND) && 247 (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) 248 return (EPERM); 249 vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td); 250 return (0); 251} 252 253/* 254 * Close called. 255 * 256 * Update the times on the inode. 257 */ 258/* ARGSUSED */ 259static int 260ufs_close(ap) 261 struct vop_close_args /* { 262 struct vnode *a_vp; 263 int a_fflag; 264 struct ucred *a_cred; 265 struct thread *a_td; 266 } */ *ap; 267{ 268 struct vnode *vp = ap->a_vp; 269 270 VI_LOCK(vp); 271 if (vp->v_usecount > 1) 272 ufs_itimes(vp); 273 VI_UNLOCK(vp); 274 return (0); 275} 276 277static int 278ufs_access(ap) 279 struct vop_access_args /* { 280 struct vnode *a_vp; 281 int a_mode; 282 struct ucred *a_cred; 283 struct thread *a_td; 284 } */ *ap; 285{ 286 struct vnode *vp = ap->a_vp; 287 struct inode *ip = VTOI(vp); 288 mode_t mode = ap->a_mode; 289 int error; 290#ifdef UFS_ACL 291 struct acl *acl; 292#endif 293 294 /* 295 * Disallow write attempts on read-only filesystems; 296 * unless the file is a socket, fifo, or a block or 297 * character device resident on the filesystem. 298 */ 299 if (mode & VWRITE) { 300 switch (vp->v_type) { 301 case VDIR: 302 case VLNK: 303 case VREG: 304 if (vp->v_mount->mnt_flag & MNT_RDONLY) 305 return (EROFS); 306#ifdef QUOTA 307 if ((error = getinoquota(ip)) != 0) 308 return (error); 309#endif 310 break; 311 default: 312 break; 313 } 314 } 315 316 /* If immutable bit set, nobody gets to write it. */ 317 if ((mode & VWRITE) && (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT))) 318 return (EPERM); 319 320#ifdef UFS_ACL 321 if ((vp->v_mount->mnt_flag & MNT_ACLS) != 0) { 322 acl = uma_zalloc(acl_zone, M_WAITOK); 323 error = VOP_GETACL(vp, ACL_TYPE_ACCESS, acl, ap->a_cred, 324 ap->a_td); 325 switch (error) { 326 case EOPNOTSUPP: 327 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, 328 ip->i_gid, ap->a_mode, ap->a_cred, NULL); 329 break; 330 case 0: 331 error = vaccess_acl_posix1e(vp->v_type, ip->i_uid, 332 ip->i_gid, acl, ap->a_mode, ap->a_cred, NULL); 333 break; 334 default: 335 printf( 336"ufs_access(): Error retrieving ACL on object (%d).\n", 337 error); 338 /* 339 * XXX: Fall back until debugged. Should 340 * eventually possibly log an error, and return 341 * EPERM for safety. 342 */ 343 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, 344 ip->i_gid, ap->a_mode, ap->a_cred, NULL); 345 } 346 uma_zfree(acl_zone, acl); 347 } else 348#endif /* !UFS_ACL */ 349 error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, 350 ap->a_mode, ap->a_cred, NULL); 351 return (error); 352} 353 354/* ARGSUSED */ 355static int 356ufs_getattr(ap) 357 struct vop_getattr_args /* { 358 struct vnode *a_vp; 359 struct vattr *a_vap; 360 struct ucred *a_cred; 361 struct thread *a_td; 362 } */ *ap; 363{ 364 struct vnode *vp = ap->a_vp; 365 struct inode *ip = VTOI(vp); 366 struct vattr *vap = ap->a_vap; 367 368 ufs_itimes(vp); 369 /* 370 * Copy from inode table 371 */ 372 vap->va_fsid = dev2udev(ip->i_dev); 373 vap->va_fileid = ip->i_number; 374 vap->va_mode = ip->i_mode & ~IFMT; 375 vap->va_nlink = ip->i_effnlink; 376 vap->va_uid = ip->i_uid; 377 vap->va_gid = ip->i_gid; 378 if (ip->i_ump->um_fstype == UFS1) { 379 vap->va_rdev = ip->i_din1->di_rdev; 380 vap->va_size = ip->i_din1->di_size; 381 vap->va_atime.tv_sec = ip->i_din1->di_atime; 382 vap->va_atime.tv_nsec = ip->i_din1->di_atimensec; 383 vap->va_mtime.tv_sec = ip->i_din1->di_mtime; 384 vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec; 385 vap->va_ctime.tv_sec = ip->i_din1->di_ctime; 386 vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec; 387 vap->va_birthtime.tv_sec = 0; 388 vap->va_birthtime.tv_nsec = 0; 389 vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks); 390 } else { 391 vap->va_rdev = ip->i_din2->di_rdev; 392 vap->va_size = ip->i_din2->di_size; 393 vap->va_atime.tv_sec = ip->i_din2->di_atime; 394 vap->va_atime.tv_nsec = ip->i_din2->di_atimensec; 395 vap->va_mtime.tv_sec = ip->i_din2->di_mtime; 396 vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec; 397 vap->va_ctime.tv_sec = ip->i_din2->di_ctime; 398 vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec; 399 vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime; 400 vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec; 401 vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks); 402 } 403 vap->va_flags = ip->i_flags; 404 vap->va_gen = ip->i_gen; 405 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 406 vap->va_type = IFTOVT(ip->i_mode); 407 vap->va_filerev = ip->i_modrev; 408 return (0); 409} 410 411/* 412 * Set attribute vnode op. called from several syscalls 413 */ 414static int 415ufs_setattr(ap) 416 struct vop_setattr_args /* { 417 struct vnode *a_vp; 418 struct vattr *a_vap; 419 struct ucred *a_cred; 420 struct thread *a_td; 421 } */ *ap; 422{ 423 struct vattr *vap = ap->a_vap; 424 struct vnode *vp = ap->a_vp; 425 struct inode *ip = VTOI(vp); 426 struct ucred *cred = ap->a_cred; 427 struct thread *td = ap->a_td; 428 int error; 429 430 /* 431 * Check for unsettable attributes. 432 */ 433 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || 434 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || 435 (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || 436 ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { 437 return (EINVAL); 438 } 439 /* 440 * Mark for update the file's access time for vfs_mark_atime(). 441 * We are doing this here to avoid some of the checks done 442 * below -- this operation is done by request of the kernel and 443 * should bypass some security checks. Things like read-only 444 * checks get handled by other levels (e.g., ffs_update()). 445 */ 446 if (vap->va_vaflags & VA_MARK_ATIME) { 447 ip->i_flag |= IN_ACCESS; 448 return (0); 449 } 450 if (vap->va_flags != VNOVAL) { 451 if (vp->v_mount->mnt_flag & MNT_RDONLY) 452 return (EROFS); 453 /* 454 * Callers may only modify the file flags on objects they 455 * have VADMIN rights for. 456 */ 457 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 458 return (error); 459 /* 460 * Unprivileged processes are not permitted to unset system 461 * flags, or modify flags if any system flags are set. 462 * Privileged non-jail processes may not modify system flags 463 * if securelevel > 0 and any existing system flags are set. 464 * Privileged jail processes behave like privileged non-jail 465 * processes if the security.jail.chflags_allowed sysctl is 466 * is non-zero; otherwise, they behave like unprivileged 467 * processes. 468 */ 469 if (!suser_cred(cred, 470 jail_chflags_allowed ? SUSER_ALLOWJAIL : 0)) { 471 if (ip->i_flags 472 & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { 473 error = securelevel_gt(cred, 0); 474 if (error) 475 return (error); 476 } 477 /* Snapshot flag cannot be set or cleared */ 478 if (((vap->va_flags & SF_SNAPSHOT) != 0 && 479 (ip->i_flags & SF_SNAPSHOT) == 0) || 480 ((vap->va_flags & SF_SNAPSHOT) == 0 && 481 (ip->i_flags & SF_SNAPSHOT) != 0)) 482 return (EPERM); 483 ip->i_flags = vap->va_flags; 484 DIP_SET(ip, i_flags, vap->va_flags); 485 } else { 486 if (ip->i_flags 487 & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || 488 (vap->va_flags & UF_SETTABLE) != vap->va_flags) 489 return (EPERM); 490 ip->i_flags &= SF_SETTABLE; 491 ip->i_flags |= (vap->va_flags & UF_SETTABLE); 492 DIP_SET(ip, i_flags, ip->i_flags); 493 } 494 ip->i_flag |= IN_CHANGE; 495 if (vap->va_flags & (IMMUTABLE | APPEND)) 496 return (0); 497 } 498 if (ip->i_flags & (IMMUTABLE | APPEND)) 499 return (EPERM); 500 /* 501 * Go through the fields and update iff not VNOVAL. 502 */ 503 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 504 if (vp->v_mount->mnt_flag & MNT_RDONLY) 505 return (EROFS); 506 if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, 507 td)) != 0) 508 return (error); 509 } 510 if (vap->va_size != VNOVAL) { 511 /* 512 * XXX most of the following special cases should be in 513 * callers instead of in N filesystems. The VDIR check 514 * mostly already is. 515 */ 516 switch (vp->v_type) { 517 case VDIR: 518 return (EISDIR); 519 case VLNK: 520 case VREG: 521 /* 522 * Truncation should have an effect in these cases. 523 * Disallow it if the filesystem is read-only or 524 * the file is being snapshotted. 525 */ 526 if (vp->v_mount->mnt_flag & MNT_RDONLY) 527 return (EROFS); 528 if ((ip->i_flags & SF_SNAPSHOT) != 0) 529 return (EPERM); 530 break; 531 default: 532 /* 533 * According to POSIX, the result is unspecified 534 * for file types other than regular files, 535 * directories and shared memory objects. We 536 * don't support shared memory objects in the file 537 * system, and have dubious support for truncating 538 * symlinks. Just ignore the request in other cases. 539 */ 540 return (0); 541 } 542 if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL, 543 cred, td)) != 0) 544 return (error); 545 } 546 if (vap->va_atime.tv_sec != VNOVAL || 547 vap->va_mtime.tv_sec != VNOVAL || 548 vap->va_birthtime.tv_sec != VNOVAL) { 549 if (vp->v_mount->mnt_flag & MNT_RDONLY) 550 return (EROFS); 551 if ((ip->i_flags & SF_SNAPSHOT) != 0) 552 return (EPERM); 553 /* 554 * From utimes(2): 555 * If times is NULL, ... The caller must be the owner of 556 * the file, have permission to write the file, or be the 557 * super-user. 558 * If times is non-NULL, ... The caller must be the owner of 559 * the file or be the super-user. 560 */ 561 if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) && 562 ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || 563 (error = VOP_ACCESS(vp, VWRITE, cred, td)))) 564 return (error); 565 if (vap->va_atime.tv_sec != VNOVAL) 566 ip->i_flag |= IN_ACCESS; 567 if (vap->va_mtime.tv_sec != VNOVAL) 568 ip->i_flag |= IN_CHANGE | IN_UPDATE; 569 if (vap->va_birthtime.tv_sec != VNOVAL && 570 ip->i_ump->um_fstype == UFS2) 571 ip->i_flag |= IN_MODIFIED; 572 ufs_itimes(vp); 573 if (vap->va_atime.tv_sec != VNOVAL) { 574 DIP_SET(ip, i_atime, vap->va_atime.tv_sec); 575 DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec); 576 } 577 if (vap->va_mtime.tv_sec != VNOVAL) { 578 DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec); 579 DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec); 580 } 581 if (vap->va_birthtime.tv_sec != VNOVAL && 582 ip->i_ump->um_fstype == UFS2) { 583 ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec; 584 ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec; 585 } 586 error = UFS_UPDATE(vp, 0); 587 if (error) 588 return (error); 589 } 590 error = 0; 591 if (vap->va_mode != (mode_t)VNOVAL) { 592 if (vp->v_mount->mnt_flag & MNT_RDONLY) 593 return (EROFS); 594 if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode & 595 (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH))) 596 return (EPERM); 597 error = ufs_chmod(vp, (int)vap->va_mode, cred, td); 598 } 599 return (error); 600} 601 602/* 603 * Change the mode on a file. 604 * Inode must be locked before calling. 605 */ 606static int 607ufs_chmod(vp, mode, cred, td) 608 struct vnode *vp; 609 int mode; 610 struct ucred *cred; 611 struct thread *td; 612{ 613 struct inode *ip = VTOI(vp); 614 int error; 615 616 /* 617 * To modify the permissions on a file, must possess VADMIN 618 * for that file. 619 */ 620 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 621 return (error); 622 /* 623 * Privileged processes may set the sticky bit on non-directories, 624 * as well as set the setgid bit on a file with a group that the 625 * process is not a member of. Both of these are allowed in 626 * jail(8). 627 */ 628 if (vp->v_type != VDIR && (mode & S_ISTXT)) { 629 if (suser_cred(cred, SUSER_ALLOWJAIL)) 630 return (EFTYPE); 631 } 632 if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { 633 error = suser_cred(cred, SUSER_ALLOWJAIL); 634 if (error) 635 return (error); 636 } 637 ip->i_mode &= ~ALLPERMS; 638 ip->i_mode |= (mode & ALLPERMS); 639 DIP_SET(ip, i_mode, ip->i_mode); 640 ip->i_flag |= IN_CHANGE; 641 return (0); 642} 643 644/* 645 * Perform chown operation on inode ip; 646 * inode must be locked prior to call. 647 */ 648static int 649ufs_chown(vp, uid, gid, cred, td) 650 struct vnode *vp; 651 uid_t uid; 652 gid_t gid; 653 struct ucred *cred; 654 struct thread *td; 655{ 656 struct inode *ip = VTOI(vp); 657 uid_t ouid; 658 gid_t ogid; 659 int error = 0; 660#ifdef QUOTA 661 int i; 662 ufs2_daddr_t change; 663#endif 664 665 if (uid == (uid_t)VNOVAL) 666 uid = ip->i_uid; 667 if (gid == (gid_t)VNOVAL) 668 gid = ip->i_gid; 669 /* 670 * To modify the ownership of a file, must possess VADMIN 671 * for that file. 672 */ 673 if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) 674 return (error); 675 /* 676 * To change the owner of a file, or change the group of a file 677 * to a group of which we are not a member, the caller must 678 * have privilege. 679 */ 680 if ((uid != ip->i_uid || 681 (gid != ip->i_gid && !groupmember(gid, cred))) && 682 (error = suser_cred(cred, SUSER_ALLOWJAIL))) 683 return (error); 684 ogid = ip->i_gid; 685 ouid = ip->i_uid; 686#ifdef QUOTA 687 if ((error = getinoquota(ip)) != 0) 688 return (error); 689 if (ouid == uid) { 690 dqrele(vp, ip->i_dquot[USRQUOTA]); 691 ip->i_dquot[USRQUOTA] = NODQUOT; 692 } 693 if (ogid == gid) { 694 dqrele(vp, ip->i_dquot[GRPQUOTA]); 695 ip->i_dquot[GRPQUOTA] = NODQUOT; 696 } 697 change = DIP(ip, i_blocks); 698 (void) chkdq(ip, -change, cred, CHOWN); 699 (void) chkiq(ip, -1, cred, CHOWN); 700 for (i = 0; i < MAXQUOTAS; i++) { 701 dqrele(vp, ip->i_dquot[i]); 702 ip->i_dquot[i] = NODQUOT; 703 } 704#endif 705 ip->i_gid = gid; 706 DIP_SET(ip, i_gid, gid); 707 ip->i_uid = uid; 708 DIP_SET(ip, i_uid, uid); 709#ifdef QUOTA 710 if ((error = getinoquota(ip)) == 0) { 711 if (ouid == uid) { 712 dqrele(vp, ip->i_dquot[USRQUOTA]); 713 ip->i_dquot[USRQUOTA] = NODQUOT; 714 } 715 if (ogid == gid) { 716 dqrele(vp, ip->i_dquot[GRPQUOTA]); 717 ip->i_dquot[GRPQUOTA] = NODQUOT; 718 } 719 if ((error = chkdq(ip, change, cred, CHOWN)) == 0) { 720 if ((error = chkiq(ip, 1, cred, CHOWN)) == 0) 721 goto good; 722 else 723 (void) chkdq(ip, -change, cred, CHOWN|FORCE); 724 } 725 for (i = 0; i < MAXQUOTAS; i++) { 726 dqrele(vp, ip->i_dquot[i]); 727 ip->i_dquot[i] = NODQUOT; 728 } 729 } 730 ip->i_gid = ogid; 731 DIP_SET(ip, i_gid, ogid); 732 ip->i_uid = ouid; 733 DIP_SET(ip, i_uid, ouid); 734 if (getinoquota(ip) == 0) { 735 if (ouid == uid) { 736 dqrele(vp, ip->i_dquot[USRQUOTA]); 737 ip->i_dquot[USRQUOTA] = NODQUOT; 738 } 739 if (ogid == gid) { 740 dqrele(vp, ip->i_dquot[GRPQUOTA]); 741 ip->i_dquot[GRPQUOTA] = NODQUOT; 742 } 743 (void) chkdq(ip, change, cred, FORCE|CHOWN); 744 (void) chkiq(ip, 1, cred, FORCE|CHOWN); 745 (void) getinoquota(ip); 746 } 747 return (error); 748good: 749 if (getinoquota(ip)) 750 panic("ufs_chown: lost quota"); 751#endif /* QUOTA */ 752 ip->i_flag |= IN_CHANGE; 753 if (suser_cred(cred, SUSER_ALLOWJAIL) && (ouid != uid || ogid != gid)) { 754 ip->i_mode &= ~(ISUID | ISGID); 755 DIP_SET(ip, i_mode, ip->i_mode); 756 } 757 return (0); 758} 759 760static int 761ufs_remove(ap) 762 struct vop_remove_args /* { 763 struct vnode *a_dvp; 764 struct vnode *a_vp; 765 struct componentname *a_cnp; 766 } */ *ap; 767{ 768 struct inode *ip; 769 struct vnode *vp = ap->a_vp; 770 struct vnode *dvp = ap->a_dvp; 771 int error; 772 struct thread *td; 773 774 td = curthread; 775 ip = VTOI(vp); 776 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 777 (VTOI(dvp)->i_flags & APPEND)) { 778 error = EPERM; 779 goto out; 780 } 781 error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0); 782 if (ip->i_nlink <= 0) 783 vp->v_vflag |= VV_NOSYNC; 784 if ((ip->i_flags & SF_SNAPSHOT) != 0) { 785 /* 786 * Avoid deadlock where another thread is trying to 787 * update the inodeblock for dvp and is waiting on 788 * snaplk. Temporary unlock the vnode lock for the 789 * unlinked file and sync the directory. This should 790 * allow vput() of the directory to not block later on 791 * while holding the snapshot vnode locked, assuming 792 * that the directory hasn't been unlinked too. 793 */ 794 VOP_UNLOCK(vp, 0, td); 795 (void) VOP_FSYNC(dvp, MNT_WAIT, td); 796 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 797 } 798out: 799 return (error); 800} 801 802/* 803 * link vnode call 804 */ 805static int 806ufs_link(ap) 807 struct vop_link_args /* { 808 struct vnode *a_tdvp; 809 struct vnode *a_vp; 810 struct componentname *a_cnp; 811 } */ *ap; 812{ 813 struct vnode *vp = ap->a_vp; 814 struct vnode *tdvp = ap->a_tdvp; 815 struct componentname *cnp = ap->a_cnp; 816 struct inode *ip; 817 struct direct newdir; 818 int error; 819 820#ifdef DIAGNOSTIC 821 if ((cnp->cn_flags & HASBUF) == 0) 822 panic("ufs_link: no name"); 823#endif 824 if (tdvp->v_mount != vp->v_mount) { 825 error = EXDEV; 826 goto out; 827 } 828 ip = VTOI(vp); 829 if ((nlink_t)ip->i_nlink >= LINK_MAX) { 830 error = EMLINK; 831 goto out; 832 } 833 if (ip->i_flags & (IMMUTABLE | APPEND)) { 834 error = EPERM; 835 goto out; 836 } 837 ip->i_effnlink++; 838 ip->i_nlink++; 839 DIP_SET(ip, i_nlink, ip->i_nlink); 840 ip->i_flag |= IN_CHANGE; 841 if (DOINGSOFTDEP(vp)) 842 softdep_change_linkcnt(ip); 843 error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp))); 844 if (!error) { 845 ufs_makedirentry(ip, cnp, &newdir); 846 error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL); 847 } 848 849 if (error) { 850 ip->i_effnlink--; 851 ip->i_nlink--; 852 DIP_SET(ip, i_nlink, ip->i_nlink); 853 ip->i_flag |= IN_CHANGE; 854 if (DOINGSOFTDEP(vp)) 855 softdep_change_linkcnt(ip); 856 } 857out: 858 return (error); 859} 860 861/* 862 * whiteout vnode call 863 */ 864static int 865ufs_whiteout(ap) 866 struct vop_whiteout_args /* { 867 struct vnode *a_dvp; 868 struct componentname *a_cnp; 869 int a_flags; 870 } */ *ap; 871{ 872 struct vnode *dvp = ap->a_dvp; 873 struct componentname *cnp = ap->a_cnp; 874 struct direct newdir; 875 int error = 0; 876 877 switch (ap->a_flags) { 878 case LOOKUP: 879 /* 4.4 format directories support whiteout operations */ 880 if (dvp->v_mount->mnt_maxsymlinklen > 0) 881 return (0); 882 return (EOPNOTSUPP); 883 884 case CREATE: 885 /* create a new directory whiteout */ 886#ifdef DIAGNOSTIC 887 if ((cnp->cn_flags & SAVENAME) == 0) 888 panic("ufs_whiteout: missing name"); 889 if (dvp->v_mount->mnt_maxsymlinklen <= 0) 890 panic("ufs_whiteout: old format filesystem"); 891#endif 892 893 newdir.d_ino = WINO; 894 newdir.d_namlen = cnp->cn_namelen; 895 bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); 896 newdir.d_type = DT_WHT; 897 error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL); 898 break; 899 900 case DELETE: 901 /* remove an existing directory whiteout */ 902#ifdef DIAGNOSTIC 903 if (dvp->v_mount->mnt_maxsymlinklen <= 0) 904 panic("ufs_whiteout: old format filesystem"); 905#endif 906 907 cnp->cn_flags &= ~DOWHITEOUT; 908 error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0); 909 break; 910 default: 911 panic("ufs_whiteout: unknown op"); 912 } 913 return (error); 914} 915 916/* 917 * Rename system call. 918 * rename("foo", "bar"); 919 * is essentially 920 * unlink("bar"); 921 * link("foo", "bar"); 922 * unlink("foo"); 923 * but ``atomically''. Can't do full commit without saving state in the 924 * inode on disk which isn't feasible at this time. Best we can do is 925 * always guarantee the target exists. 926 * 927 * Basic algorithm is: 928 * 929 * 1) Bump link count on source while we're linking it to the 930 * target. This also ensure the inode won't be deleted out 931 * from underneath us while we work (it may be truncated by 932 * a concurrent `trunc' or `open' for creation). 933 * 2) Link source to destination. If destination already exists, 934 * delete it first. 935 * 3) Unlink source reference to inode if still around. If a 936 * directory was moved and the parent of the destination 937 * is different from the source, patch the ".." entry in the 938 * directory. 939 */ 940static int 941ufs_rename(ap) 942 struct vop_rename_args /* { 943 struct vnode *a_fdvp; 944 struct vnode *a_fvp; 945 struct componentname *a_fcnp; 946 struct vnode *a_tdvp; 947 struct vnode *a_tvp; 948 struct componentname *a_tcnp; 949 } */ *ap; 950{ 951 struct vnode *tvp = ap->a_tvp; 952 struct vnode *tdvp = ap->a_tdvp; 953 struct vnode *fvp = ap->a_fvp; 954 struct vnode *fdvp = ap->a_fdvp; 955 struct componentname *tcnp = ap->a_tcnp; 956 struct componentname *fcnp = ap->a_fcnp; 957 struct thread *td = fcnp->cn_thread; 958 struct inode *ip, *xp, *dp; 959 struct direct newdir; 960 int doingdirectory = 0, oldparent = 0, newparent = 0; 961 int error = 0, ioflag; 962 963#ifdef DIAGNOSTIC 964 if ((tcnp->cn_flags & HASBUF) == 0 || 965 (fcnp->cn_flags & HASBUF) == 0) 966 panic("ufs_rename: no name"); 967#endif 968 /* 969 * Check for cross-device rename. 970 */ 971 if ((fvp->v_mount != tdvp->v_mount) || 972 (tvp && (fvp->v_mount != tvp->v_mount))) { 973 error = EXDEV; 974abortit: 975 if (tdvp == tvp) 976 vrele(tdvp); 977 else 978 vput(tdvp); 979 if (tvp) 980 vput(tvp); 981 vrele(fdvp); 982 vrele(fvp); 983 return (error); 984 } 985 986 if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 987 (VTOI(tdvp)->i_flags & APPEND))) { 988 error = EPERM; 989 goto abortit; 990 } 991 992 /* 993 * Renaming a file to itself has no effect. The upper layers should 994 * not call us in that case. Temporarily just warn if they do. 995 */ 996 if (fvp == tvp) { 997 printf("ufs_rename: fvp == tvp (can't happen)\n"); 998 error = 0; 999 goto abortit; 1000 } 1001 1002 if ((error = vn_lock(fvp, LK_EXCLUSIVE, td)) != 0) 1003 goto abortit; 1004 dp = VTOI(fdvp); 1005 ip = VTOI(fvp); 1006 if (ip->i_nlink >= LINK_MAX) { 1007 VOP_UNLOCK(fvp, 0, td); 1008 error = EMLINK; 1009 goto abortit; 1010 } 1011 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) 1012 || (dp->i_flags & APPEND)) { 1013 VOP_UNLOCK(fvp, 0, td); 1014 error = EPERM; 1015 goto abortit; 1016 } 1017 if ((ip->i_mode & IFMT) == IFDIR) { 1018 /* 1019 * Avoid ".", "..", and aliases of "." for obvious reasons. 1020 */ 1021 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 1022 dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || 1023 (ip->i_flag & IN_RENAME)) { 1024 VOP_UNLOCK(fvp, 0, td); 1025 error = EINVAL; 1026 goto abortit; 1027 } 1028 ip->i_flag |= IN_RENAME; 1029 oldparent = dp->i_number; 1030 doingdirectory = 1; 1031 } 1032 vrele(fdvp); 1033 1034 /* 1035 * When the target exists, both the directory 1036 * and target vnodes are returned locked. 1037 */ 1038 dp = VTOI(tdvp); 1039 xp = NULL; 1040 if (tvp) 1041 xp = VTOI(tvp); 1042 1043 /* 1044 * 1) Bump link count while we're moving stuff 1045 * around. If we crash somewhere before 1046 * completing our work, the link count 1047 * may be wrong, but correctable. 1048 */ 1049 ip->i_effnlink++; 1050 ip->i_nlink++; 1051 DIP_SET(ip, i_nlink, ip->i_nlink); 1052 ip->i_flag |= IN_CHANGE; 1053 if (DOINGSOFTDEP(fvp)) 1054 softdep_change_linkcnt(ip); 1055 if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) | 1056 DOINGASYNC(fvp)))) != 0) { 1057 VOP_UNLOCK(fvp, 0, td); 1058 goto bad; 1059 } 1060 1061 /* 1062 * If ".." must be changed (ie the directory gets a new 1063 * parent) then the source directory must not be in the 1064 * directory heirarchy above the target, as this would 1065 * orphan everything below the source directory. Also 1066 * the user must have write permission in the source so 1067 * as to be able to change "..". We must repeat the call 1068 * to namei, as the parent directory is unlocked by the 1069 * call to checkpath(). 1070 */ 1071 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); 1072 VOP_UNLOCK(fvp, 0, td); 1073 if (oldparent != dp->i_number) 1074 newparent = dp->i_number; 1075 if (doingdirectory && newparent) { 1076 if (error) /* write access check above */ 1077 goto bad; 1078 if (xp != NULL) 1079 vput(tvp); 1080 error = ufs_checkpath(ip, dp, tcnp->cn_cred); 1081 if (error) 1082 goto out; 1083 if ((tcnp->cn_flags & SAVESTART) == 0) 1084 panic("ufs_rename: lost to startdir"); 1085 VREF(tdvp); 1086 error = relookup(tdvp, &tvp, tcnp); 1087 if (error) 1088 goto out; 1089 vrele(tdvp); 1090 dp = VTOI(tdvp); 1091 xp = NULL; 1092 if (tvp) 1093 xp = VTOI(tvp); 1094 } 1095 /* 1096 * 2) If target doesn't exist, link the target 1097 * to the source and unlink the source. 1098 * Otherwise, rewrite the target directory 1099 * entry to reference the source inode and 1100 * expunge the original entry's existence. 1101 */ 1102 if (xp == NULL) { 1103 if (dp->i_dev != ip->i_dev) 1104 panic("ufs_rename: EXDEV"); 1105 /* 1106 * Account for ".." in new directory. 1107 * When source and destination have the same 1108 * parent we don't fool with the link count. 1109 */ 1110 if (doingdirectory && newparent) { 1111 if ((nlink_t)dp->i_nlink >= LINK_MAX) { 1112 error = EMLINK; 1113 goto bad; 1114 } 1115 dp->i_effnlink++; 1116 dp->i_nlink++; 1117 DIP_SET(dp, i_nlink, dp->i_nlink); 1118 dp->i_flag |= IN_CHANGE; 1119 if (DOINGSOFTDEP(tdvp)) 1120 softdep_change_linkcnt(dp); 1121 error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) | 1122 DOINGASYNC(tdvp))); 1123 if (error) 1124 goto bad; 1125 } 1126 ufs_makedirentry(ip, tcnp, &newdir); 1127 error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL); 1128 if (error) { 1129 if (doingdirectory && newparent) { 1130 dp->i_effnlink--; 1131 dp->i_nlink--; 1132 DIP_SET(dp, i_nlink, dp->i_nlink); 1133 dp->i_flag |= IN_CHANGE; 1134 if (DOINGSOFTDEP(tdvp)) 1135 softdep_change_linkcnt(dp); 1136 (void)UFS_UPDATE(tdvp, 1); 1137 } 1138 goto bad; 1139 } 1140 vput(tdvp); 1141 } else { 1142 if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) 1143 panic("ufs_rename: EXDEV"); 1144 /* 1145 * Short circuit rename(foo, foo). 1146 */ 1147 if (xp->i_number == ip->i_number) 1148 panic("ufs_rename: same file"); 1149 /* 1150 * If the parent directory is "sticky", then the caller 1151 * must possess VADMIN for the parent directory, or the 1152 * destination of the rename. This implements append-only 1153 * directories. 1154 */ 1155 if ((dp->i_mode & S_ISTXT) && 1156 VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) && 1157 VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) { 1158 error = EPERM; 1159 goto bad; 1160 } 1161 /* 1162 * Target must be empty if a directory and have no links 1163 * to it. Also, ensure source and target are compatible 1164 * (both directories, or both not directories). 1165 */ 1166 if ((xp->i_mode&IFMT) == IFDIR) { 1167 if ((xp->i_effnlink > 2) || 1168 !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) { 1169 error = ENOTEMPTY; 1170 goto bad; 1171 } 1172 if (!doingdirectory) { 1173 error = ENOTDIR; 1174 goto bad; 1175 } 1176 cache_purge(tdvp); 1177 } else if (doingdirectory) { 1178 error = EISDIR; 1179 goto bad; 1180 } 1181 error = ufs_dirrewrite(dp, xp, ip->i_number, 1182 IFTODT(ip->i_mode), 1183 (doingdirectory && newparent) ? newparent : doingdirectory); 1184 if (error) 1185 goto bad; 1186 if (doingdirectory) { 1187 if (!newparent) { 1188 dp->i_effnlink--; 1189 if (DOINGSOFTDEP(tdvp)) 1190 softdep_change_linkcnt(dp); 1191 } 1192 xp->i_effnlink--; 1193 if (DOINGSOFTDEP(tvp)) 1194 softdep_change_linkcnt(xp); 1195 } 1196 if (doingdirectory && !DOINGSOFTDEP(tvp)) { 1197 /* 1198 * Truncate inode. The only stuff left in the directory 1199 * is "." and "..". The "." reference is inconsequential 1200 * since we are quashing it. We have removed the "." 1201 * reference and the reference in the parent directory, 1202 * but there may be other hard links. The soft 1203 * dependency code will arrange to do these operations 1204 * after the parent directory entry has been deleted on 1205 * disk, so when running with that code we avoid doing 1206 * them now. 1207 */ 1208 if (!newparent) { 1209 dp->i_nlink--; 1210 DIP_SET(dp, i_nlink, dp->i_nlink); 1211 dp->i_flag |= IN_CHANGE; 1212 } 1213 xp->i_nlink--; 1214 DIP_SET(xp, i_nlink, xp->i_nlink); 1215 xp->i_flag |= IN_CHANGE; 1216 ioflag = IO_NORMAL; 1217 if (DOINGASYNC(tvp)) 1218 ioflag |= IO_SYNC; 1219 if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag, 1220 tcnp->cn_cred, tcnp->cn_thread)) != 0) 1221 goto bad; 1222 } 1223 vput(tdvp); 1224 vput(tvp); 1225 xp = NULL; 1226 } 1227 1228 /* 1229 * 3) Unlink the source. 1230 */ 1231 fcnp->cn_flags &= ~MODMASK; 1232 fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; 1233 if ((fcnp->cn_flags & SAVESTART) == 0) 1234 panic("ufs_rename: lost from startdir"); 1235 VREF(fdvp); 1236 error = relookup(fdvp, &fvp, fcnp); 1237 if (error == 0) 1238 vrele(fdvp); 1239 if (fvp != NULL) { 1240 xp = VTOI(fvp); 1241 dp = VTOI(fdvp); 1242 } else { 1243 /* 1244 * From name has disappeared. IN_RENAME is not sufficient 1245 * to protect against directory races due to timing windows, 1246 * so we have to remove the panic. XXX the only real way 1247 * to solve this issue is at a much higher level. By the 1248 * time we hit ufs_rename() it's too late. 1249 */ 1250#if 0 1251 if (doingdirectory) 1252 panic("ufs_rename: lost dir entry"); 1253#endif 1254 vrele(ap->a_fvp); 1255 return (0); 1256 } 1257 /* 1258 * Ensure that the directory entry still exists and has not 1259 * changed while the new name has been entered. If the source is 1260 * a file then the entry may have been unlinked or renamed. In 1261 * either case there is no further work to be done. If the source 1262 * is a directory then it cannot have been rmdir'ed; the IN_RENAME 1263 * flag ensures that it cannot be moved by another rename or removed 1264 * by a rmdir. 1265 */ 1266 if (xp != ip) { 1267 /* 1268 * From name resolves to a different inode. IN_RENAME is 1269 * not sufficient protection against timing window races 1270 * so we can't panic here. XXX the only real way 1271 * to solve this issue is at a much higher level. By the 1272 * time we hit ufs_rename() it's too late. 1273 */ 1274#if 0 1275 if (doingdirectory) 1276 panic("ufs_rename: lost dir entry"); 1277#endif 1278 } else { 1279 /* 1280 * If the source is a directory with a 1281 * new parent, the link count of the old 1282 * parent directory must be decremented 1283 * and ".." set to point to the new parent. 1284 */ 1285 if (doingdirectory && newparent) { 1286 xp->i_offset = mastertemplate.dot_reclen; 1287 ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0); 1288 cache_purge(fdvp); 1289 } 1290 error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0); 1291 xp->i_flag &= ~IN_RENAME; 1292 } 1293 if (dp) 1294 vput(fdvp); 1295 if (xp) 1296 vput(fvp); 1297 vrele(ap->a_fvp); 1298 return (error); 1299 1300bad: 1301 if (xp) 1302 vput(ITOV(xp)); 1303 vput(ITOV(dp)); 1304out: 1305 if (doingdirectory) 1306 ip->i_flag &= ~IN_RENAME; 1307 if (vn_lock(fvp, LK_EXCLUSIVE, td) == 0) { 1308 ip->i_effnlink--; 1309 ip->i_nlink--; 1310 DIP_SET(ip, i_nlink, ip->i_nlink); 1311 ip->i_flag |= IN_CHANGE; 1312 ip->i_flag &= ~IN_RENAME; 1313 if (DOINGSOFTDEP(fvp)) 1314 softdep_change_linkcnt(ip); 1315 vput(fvp); 1316 } else 1317 vrele(fvp); 1318 return (error); 1319} 1320 1321/* 1322 * Mkdir system call 1323 */ 1324static int 1325ufs_mkdir(ap) 1326 struct vop_mkdir_args /* { 1327 struct vnode *a_dvp; 1328 struct vnode **a_vpp; 1329 struct componentname *a_cnp; 1330 struct vattr *a_vap; 1331 } */ *ap; 1332{ 1333 struct vnode *dvp = ap->a_dvp; 1334 struct vattr *vap = ap->a_vap; 1335 struct componentname *cnp = ap->a_cnp; 1336 struct inode *ip, *dp; 1337 struct vnode *tvp; 1338 struct buf *bp; 1339 struct dirtemplate dirtemplate, *dtp; 1340 struct direct newdir; 1341#ifdef UFS_ACL 1342 struct acl *acl, *dacl; 1343#endif 1344 int error, dmode; 1345 long blkoff; 1346 1347#ifdef DIAGNOSTIC 1348 if ((cnp->cn_flags & HASBUF) == 0) 1349 panic("ufs_mkdir: no name"); 1350#endif 1351 dp = VTOI(dvp); 1352 if ((nlink_t)dp->i_nlink >= LINK_MAX) { 1353 error = EMLINK; 1354 goto out; 1355 } 1356 dmode = vap->va_mode & 0777; 1357 dmode |= IFDIR; 1358 /* 1359 * Must simulate part of ufs_makeinode here to acquire the inode, 1360 * but not have it entered in the parent directory. The entry is 1361 * made later after writing "." and ".." entries. 1362 */ 1363 error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); 1364 if (error) 1365 goto out; 1366 ip = VTOI(tvp); 1367 ip->i_gid = dp->i_gid; 1368 DIP_SET(ip, i_gid, dp->i_gid); 1369#ifdef SUIDDIR 1370 { 1371#ifdef QUOTA 1372 struct ucred ucred, *ucp; 1373 ucp = cnp->cn_cred; 1374#endif 1375 /* 1376 * If we are hacking owners here, (only do this where told to) 1377 * and we are not giving it TO root, (would subvert quotas) 1378 * then go ahead and give it to the other user. 1379 * The new directory also inherits the SUID bit. 1380 * If user's UID and dir UID are the same, 1381 * 'give it away' so that the SUID is still forced on. 1382 */ 1383 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 1384 (dp->i_mode & ISUID) && dp->i_uid) { 1385 dmode |= ISUID; 1386 ip->i_uid = dp->i_uid; 1387 DIP_SET(ip, i_uid, dp->i_uid); 1388#ifdef QUOTA 1389 if (dp->i_uid != cnp->cn_cred->cr_uid) { 1390 /* 1391 * Make sure the correct user gets charged 1392 * for the space. 1393 * Make a dummy credential for the victim. 1394 * XXX This seems to never be accessed out of 1395 * our context so a stack variable is ok. 1396 */ 1397 refcount_init(&ucred.cr_ref, 1); 1398 ucred.cr_uid = ip->i_uid; 1399 ucred.cr_ngroups = 1; 1400 ucred.cr_groups[0] = dp->i_gid; 1401 ucp = &ucred; 1402 } 1403#endif 1404 } else { 1405 ip->i_uid = cnp->cn_cred->cr_uid; 1406 DIP_SET(ip, i_uid, ip->i_uid); 1407 } 1408#ifdef QUOTA 1409 if ((error = getinoquota(ip)) || 1410 (error = chkiq(ip, 1, ucp, 0))) { 1411 UFS_VFREE(tvp, ip->i_number, dmode); 1412 vput(tvp); 1413 return (error); 1414 } 1415#endif 1416 } 1417#else /* !SUIDDIR */ 1418 ip->i_uid = cnp->cn_cred->cr_uid; 1419 DIP_SET(ip, i_uid, ip->i_uid); 1420#ifdef QUOTA 1421 if ((error = getinoquota(ip)) || 1422 (error = chkiq(ip, 1, cnp->cn_cred, 0))) { 1423 UFS_VFREE(tvp, ip->i_number, dmode); 1424 vput(tvp); 1425 return (error); 1426 } 1427#endif 1428#endif /* !SUIDDIR */ 1429 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1430#ifdef UFS_ACL 1431 acl = dacl = NULL; 1432 if ((dvp->v_mount->mnt_flag & MNT_ACLS) != 0) { 1433 acl = uma_zalloc(acl_zone, M_WAITOK); 1434 dacl = uma_zalloc(acl_zone, M_WAITOK); 1435 1436 /* 1437 * Retrieve default ACL from parent, if any. 1438 */ 1439 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cnp->cn_cred, 1440 cnp->cn_thread); 1441 switch (error) { 1442 case 0: 1443 /* 1444 * Retrieved a default ACL, so merge mode and ACL if 1445 * necessary. If the ACL is empty, fall through to 1446 * the "not defined or available" case. 1447 */ 1448 if (acl->acl_cnt != 0) { 1449 dmode = acl_posix1e_newfilemode(dmode, acl); 1450 ip->i_mode = dmode; 1451 DIP_SET(ip, i_mode, dmode); 1452 *dacl = *acl; 1453 ufs_sync_acl_from_inode(ip, acl); 1454 break; 1455 } 1456 /* FALLTHROUGH */ 1457 1458 case EOPNOTSUPP: 1459 /* 1460 * Just use the mode as-is. 1461 */ 1462 ip->i_mode = dmode; 1463 DIP_SET(ip, i_mode, dmode); 1464 uma_zfree(acl_zone, acl); 1465 uma_zfree(acl_zone, dacl); 1466 dacl = acl = NULL; 1467 break; 1468 1469 default: 1470 UFS_VFREE(tvp, ip->i_number, dmode); 1471 vput(tvp); 1472 uma_zfree(acl_zone, acl); 1473 uma_zfree(acl_zone, dacl); 1474 return (error); 1475 } 1476 } else { 1477#endif /* !UFS_ACL */ 1478 ip->i_mode = dmode; 1479 DIP_SET(ip, i_mode, dmode); 1480#ifdef UFS_ACL 1481 } 1482#endif 1483 tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ 1484 ip->i_effnlink = 2; 1485 ip->i_nlink = 2; 1486 DIP_SET(ip, i_nlink, 2); 1487 if (DOINGSOFTDEP(tvp)) 1488 softdep_change_linkcnt(ip); 1489 if (cnp->cn_flags & ISWHITEOUT) { 1490 ip->i_flags |= UF_OPAQUE; 1491 DIP_SET(ip, i_flags, ip->i_flags); 1492 } 1493 1494 /* 1495 * Bump link count in parent directory to reflect work done below. 1496 * Should be done before reference is created so cleanup is 1497 * possible if we crash. 1498 */ 1499 dp->i_effnlink++; 1500 dp->i_nlink++; 1501 DIP_SET(dp, i_nlink, dp->i_nlink); 1502 dp->i_flag |= IN_CHANGE; 1503 if (DOINGSOFTDEP(dvp)) 1504 softdep_change_linkcnt(dp); 1505 error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp))); 1506 if (error) 1507 goto bad; 1508#ifdef MAC 1509 if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { 1510 error = mac_create_vnode_extattr(cnp->cn_cred, dvp->v_mount, 1511 dvp, tvp, cnp); 1512 if (error) 1513 goto bad; 1514 } 1515#endif 1516#ifdef UFS_ACL 1517 if (acl != NULL) { 1518 /* 1519 * XXX: If we abort now, will Soft Updates notify the extattr 1520 * code that the EAs for the file need to be released? 1521 */ 1522 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cnp->cn_cred, 1523 cnp->cn_thread); 1524 if (error == 0) 1525 error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, 1526 cnp->cn_cred, cnp->cn_thread); 1527 switch (error) { 1528 case 0: 1529 break; 1530 1531 case EOPNOTSUPP: 1532 /* 1533 * XXX: This should not happen, as EOPNOTSUPP above 1534 * was supposed to free acl. 1535 */ 1536 printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n"); 1537 /* 1538 panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()"); 1539 */ 1540 break; 1541 1542 default: 1543 uma_zfree(acl_zone, acl); 1544 uma_zfree(acl_zone, dacl); 1545 dacl = acl = NULL; 1546 goto bad; 1547 } 1548 uma_zfree(acl_zone, acl); 1549 uma_zfree(acl_zone, dacl); 1550 dacl = acl = NULL; 1551 } 1552#endif /* !UFS_ACL */ 1553 1554 /* 1555 * Initialize directory with "." and ".." from static template. 1556 */ 1557 if (dvp->v_mount->mnt_maxsymlinklen > 0) 1558 dtp = &mastertemplate; 1559 else 1560 dtp = (struct dirtemplate *)&omastertemplate; 1561 dirtemplate = *dtp; 1562 dirtemplate.dot_ino = ip->i_number; 1563 dirtemplate.dotdot_ino = dp->i_number; 1564 if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred, 1565 BA_CLRBUF, &bp)) != 0) 1566 goto bad; 1567 ip->i_size = DIRBLKSIZ; 1568 DIP_SET(ip, i_size, DIRBLKSIZ); 1569 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1570 vnode_pager_setsize(tvp, (u_long)ip->i_size); 1571 bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate); 1572 if (DOINGSOFTDEP(tvp)) { 1573 /* 1574 * Ensure that the entire newly allocated block is a 1575 * valid directory so that future growth within the 1576 * block does not have to ensure that the block is 1577 * written before the inode. 1578 */ 1579 blkoff = DIRBLKSIZ; 1580 while (blkoff < bp->b_bcount) { 1581 ((struct direct *) 1582 (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; 1583 blkoff += DIRBLKSIZ; 1584 } 1585 } 1586 if ((error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) | 1587 DOINGASYNC(tvp)))) != 0) { 1588 (void)bwrite(bp); 1589 goto bad; 1590 } 1591 /* 1592 * Directory set up, now install its entry in the parent directory. 1593 * 1594 * If we are not doing soft dependencies, then we must write out the 1595 * buffer containing the new directory body before entering the new 1596 * name in the parent. If we are doing soft dependencies, then the 1597 * buffer containing the new directory body will be passed to and 1598 * released in the soft dependency code after the code has attached 1599 * an appropriate ordering dependency to the buffer which ensures that 1600 * the buffer is written before the new name is written in the parent. 1601 */ 1602 if (DOINGASYNC(dvp)) 1603 bdwrite(bp); 1604 else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp)))) 1605 goto bad; 1606 ufs_makedirentry(ip, cnp, &newdir); 1607 error = ufs_direnter(dvp, tvp, &newdir, cnp, bp); 1608 1609bad: 1610 if (error == 0) { 1611 *ap->a_vpp = tvp; 1612 } else { 1613#ifdef UFS_ACL 1614 if (acl != NULL) 1615 uma_zfree(acl_zone, acl); 1616 if (dacl != NULL) 1617 uma_zfree(acl_zone, dacl); 1618#endif 1619 dp->i_effnlink--; 1620 dp->i_nlink--; 1621 DIP_SET(dp, i_nlink, dp->i_nlink); 1622 dp->i_flag |= IN_CHANGE; 1623 if (DOINGSOFTDEP(dvp)) 1624 softdep_change_linkcnt(dp); 1625 /* 1626 * No need to do an explicit VOP_TRUNCATE here, vrele will 1627 * do this for us because we set the link count to 0. 1628 */ 1629 ip->i_effnlink = 0; 1630 ip->i_nlink = 0; 1631 DIP_SET(ip, i_nlink, 0); 1632 ip->i_flag |= IN_CHANGE; 1633 if (DOINGSOFTDEP(tvp)) 1634 softdep_change_linkcnt(ip); 1635 vput(tvp); 1636 } 1637out: 1638 return (error); 1639} 1640 1641/* 1642 * Rmdir system call. 1643 */ 1644static int 1645ufs_rmdir(ap) 1646 struct vop_rmdir_args /* { 1647 struct vnode *a_dvp; 1648 struct vnode *a_vp; 1649 struct componentname *a_cnp; 1650 } */ *ap; 1651{ 1652 struct vnode *vp = ap->a_vp; 1653 struct vnode *dvp = ap->a_dvp; 1654 struct componentname *cnp = ap->a_cnp; 1655 struct inode *ip, *dp; 1656 int error, ioflag; 1657 1658 ip = VTOI(vp); 1659 dp = VTOI(dvp); 1660 1661 /* 1662 * Do not remove a directory that is in the process of being renamed. 1663 * Verify the directory is empty (and valid). Rmdir ".." will not be 1664 * valid since ".." will contain a reference to the current directory 1665 * and thus be non-empty. Do not allow the removal of mounted on 1666 * directories (this can happen when an NFS exported filesystem 1667 * tries to remove a locally mounted on directory). 1668 */ 1669 error = 0; 1670 if ((ip->i_flag & IN_RENAME) || ip->i_effnlink < 2) { 1671 error = EINVAL; 1672 goto out; 1673 } 1674 if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { 1675 error = ENOTEMPTY; 1676 goto out; 1677 } 1678 if ((dp->i_flags & APPEND) 1679 || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { 1680 error = EPERM; 1681 goto out; 1682 } 1683 if (vp->v_mountedhere != 0) { 1684 error = EINVAL; 1685 goto out; 1686 } 1687 /* 1688 * Delete reference to directory before purging 1689 * inode. If we crash in between, the directory 1690 * will be reattached to lost+found, 1691 */ 1692 dp->i_effnlink--; 1693 ip->i_effnlink--; 1694 if (DOINGSOFTDEP(vp)) { 1695 softdep_change_linkcnt(dp); 1696 softdep_change_linkcnt(ip); 1697 } 1698 error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1); 1699 if (error) { 1700 dp->i_effnlink++; 1701 ip->i_effnlink++; 1702 if (DOINGSOFTDEP(vp)) { 1703 softdep_change_linkcnt(dp); 1704 softdep_change_linkcnt(ip); 1705 } 1706 goto out; 1707 } 1708 cache_purge(dvp); 1709 /* 1710 * Truncate inode. The only stuff left in the directory is "." and 1711 * "..". The "." reference is inconsequential since we are quashing 1712 * it. The soft dependency code will arrange to do these operations 1713 * after the parent directory entry has been deleted on disk, so 1714 * when running with that code we avoid doing them now. 1715 */ 1716 if (!DOINGSOFTDEP(vp)) { 1717 dp->i_nlink--; 1718 DIP_SET(dp, i_nlink, dp->i_nlink); 1719 dp->i_flag |= IN_CHANGE; 1720 ip->i_nlink--; 1721 DIP_SET(ip, i_nlink, ip->i_nlink); 1722 ip->i_flag |= IN_CHANGE; 1723 ioflag = IO_NORMAL; 1724 if (DOINGASYNC(vp)) 1725 ioflag |= IO_SYNC; 1726 error = UFS_TRUNCATE(vp, (off_t)0, ioflag, cnp->cn_cred, 1727 cnp->cn_thread); 1728 } 1729 cache_purge(vp); 1730#ifdef UFS_DIRHASH 1731 /* Kill any active hash; i_effnlink == 0, so it will not come back. */ 1732 if (ip->i_dirhash != NULL) 1733 ufsdirhash_free(ip); 1734#endif 1735out: 1736 return (error); 1737} 1738 1739/* 1740 * symlink -- make a symbolic link 1741 */ 1742static int 1743ufs_symlink(ap) 1744 struct vop_symlink_args /* { 1745 struct vnode *a_dvp; 1746 struct vnode **a_vpp; 1747 struct componentname *a_cnp; 1748 struct vattr *a_vap; 1749 char *a_target; 1750 } */ *ap; 1751{ 1752 struct vnode *vp, **vpp = ap->a_vpp; 1753 struct inode *ip; 1754 int len, error; 1755 1756 error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, 1757 vpp, ap->a_cnp); 1758 if (error) 1759 return (error); 1760 vp = *vpp; 1761 len = strlen(ap->a_target); 1762 if (len < vp->v_mount->mnt_maxsymlinklen) { 1763 ip = VTOI(vp); 1764 bcopy(ap->a_target, SHORTLINK(ip), len); 1765 ip->i_size = len; 1766 DIP_SET(ip, i_size, len); 1767 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1768 } else 1769 error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, 1770 UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, 1771 ap->a_cnp->cn_cred, NOCRED, (int *)0, (struct thread *)0); 1772 if (error) 1773 vput(vp); 1774 return (error); 1775} 1776 1777/* 1778 * Vnode op for reading directories. 1779 * 1780 * The routine below assumes that the on-disk format of a directory 1781 * is the same as that defined by <sys/dirent.h>. If the on-disk 1782 * format changes, then it will be necessary to do a conversion 1783 * from the on-disk format that read returns to the format defined 1784 * by <sys/dirent.h>. 1785 */ 1786int 1787ufs_readdir(ap) 1788 struct vop_readdir_args /* { 1789 struct vnode *a_vp; 1790 struct uio *a_uio; 1791 struct ucred *a_cred; 1792 int *a_eofflag; 1793 int *a_ncookies; 1794 u_long **a_cookies; 1795 } */ *ap; 1796{ 1797 struct uio *uio = ap->a_uio; 1798 int error; 1799 size_t count, lost; 1800 off_t off; 1801 1802 if (ap->a_ncookies != NULL) 1803 /* 1804 * Ensure that the block is aligned. The caller can use 1805 * the cookies to determine where in the block to start. 1806 */ 1807 uio->uio_offset &= ~(DIRBLKSIZ - 1); 1808 off = uio->uio_offset; 1809 count = uio->uio_resid; 1810 /* Make sure we don't return partial entries. */ 1811 if (count <= ((uio->uio_offset + count) & (DIRBLKSIZ -1))) 1812 return (EINVAL); 1813 count -= (uio->uio_offset + count) & (DIRBLKSIZ -1); 1814 lost = uio->uio_resid - count; 1815 uio->uio_resid = count; 1816 uio->uio_iov->iov_len = count; 1817# if (BYTE_ORDER == LITTLE_ENDIAN) 1818 if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) { 1819 error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred); 1820 } else { 1821 struct dirent *dp, *edp; 1822 struct uio auio; 1823 struct iovec aiov; 1824 caddr_t dirbuf; 1825 int readcnt; 1826 u_char tmp; 1827 1828 auio = *uio; 1829 auio.uio_iov = &aiov; 1830 auio.uio_iovcnt = 1; 1831 auio.uio_segflg = UIO_SYSSPACE; 1832 aiov.iov_len = count; 1833 MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK); 1834 aiov.iov_base = dirbuf; 1835 error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred); 1836 if (error == 0) { 1837 readcnt = count - auio.uio_resid; 1838 edp = (struct dirent *)&dirbuf[readcnt]; 1839 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 1840 tmp = dp->d_namlen; 1841 dp->d_namlen = dp->d_type; 1842 dp->d_type = tmp; 1843 if (dp->d_reclen > 0) { 1844 dp = (struct dirent *) 1845 ((char *)dp + dp->d_reclen); 1846 } else { 1847 error = EIO; 1848 break; 1849 } 1850 } 1851 if (dp >= edp) 1852 error = uiomove(dirbuf, readcnt, uio); 1853 } 1854 FREE(dirbuf, M_TEMP); 1855 } 1856# else 1857 error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred); 1858# endif 1859 if (!error && ap->a_ncookies != NULL) { 1860 struct dirent* dpStart; 1861 struct dirent* dpEnd; 1862 struct dirent* dp; 1863 int ncookies; 1864 u_long *cookies; 1865 u_long *cookiep; 1866 1867 if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 1868 panic("ufs_readdir: unexpected uio from NFS server"); 1869 dpStart = (struct dirent *) 1870 ((char *)uio->uio_iov->iov_base - (uio->uio_offset - off)); 1871 dpEnd = (struct dirent *) uio->uio_iov->iov_base; 1872 for (dp = dpStart, ncookies = 0; 1873 dp < dpEnd; 1874 dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) 1875 ncookies++; 1876 MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, 1877 M_WAITOK); 1878 for (dp = dpStart, cookiep = cookies; 1879 dp < dpEnd; 1880 dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) { 1881 off += dp->d_reclen; 1882 *cookiep++ = (u_long) off; 1883 } 1884 *ap->a_ncookies = ncookies; 1885 *ap->a_cookies = cookies; 1886 } 1887 uio->uio_resid += lost; 1888 if (ap->a_eofflag) 1889 *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset; 1890 return (error); 1891} 1892 1893/* 1894 * Return target name of a symbolic link 1895 */ 1896static int 1897ufs_readlink(ap) 1898 struct vop_readlink_args /* { 1899 struct vnode *a_vp; 1900 struct uio *a_uio; 1901 struct ucred *a_cred; 1902 } */ *ap; 1903{ 1904 struct vnode *vp = ap->a_vp; 1905 struct inode *ip = VTOI(vp); 1906 doff_t isize; 1907 1908 isize = ip->i_size; 1909 if ((isize < vp->v_mount->mnt_maxsymlinklen) || 1910 DIP(ip, i_blocks) == 0) { /* XXX - for old fastlink support */ 1911 return (uiomove(SHORTLINK(ip), isize, ap->a_uio)); 1912 } 1913 return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); 1914} 1915 1916/* 1917 * Calculate the logical to physical mapping if not done already, 1918 * then call the device strategy routine. 1919 * 1920 * In order to be able to swap to a file, the ufs_bmaparray() operation may not 1921 * deadlock on memory. See ufs_bmap() for details. 1922 */ 1923static int 1924ufs_strategy(ap) 1925 struct vop_strategy_args /* { 1926 struct vnode *a_vp; 1927 struct buf *a_bp; 1928 } */ *ap; 1929{ 1930 struct buf *bp = ap->a_bp; 1931 struct vnode *vp = ap->a_vp; 1932 struct bufobj *bo; 1933 struct inode *ip; 1934 ufs2_daddr_t blkno; 1935 int error; 1936 1937 ip = VTOI(vp); 1938 if (bp->b_blkno == bp->b_lblkno) { 1939 error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL); 1940 bp->b_blkno = blkno; 1941 if (error) { 1942 bp->b_error = error; 1943 bp->b_ioflags |= BIO_ERROR; 1944 bufdone(bp); 1945 return (error); 1946 } 1947 if ((long)bp->b_blkno == -1) 1948 vfs_bio_clrbuf(bp); 1949 } 1950 if ((long)bp->b_blkno == -1) { 1951 bufdone(bp); 1952 return (0); 1953 } 1954 bp->b_iooffset = dbtob(bp->b_blkno); 1955 bo = ip->i_umbufobj; 1956 BO_STRATEGY(bo, bp); 1957 return (0); 1958} 1959 1960/* 1961 * Print out the contents of an inode. 1962 */ 1963static int 1964ufs_print(ap) 1965 struct vop_print_args /* { 1966 struct vnode *a_vp; 1967 } */ *ap; 1968{ 1969 struct vnode *vp = ap->a_vp; 1970 struct inode *ip = VTOI(vp); 1971 1972 printf("\tino %lu, on dev %s", (u_long)ip->i_number, 1973 devtoname(ip->i_dev)); 1974 if (vp->v_type == VFIFO) 1975 fifo_printinfo(vp); 1976 printf("\n"); 1977 return (0); 1978} 1979 1980/* 1981 * Close wrapper for fifos. 1982 * 1983 * Update the times on the inode then do device close. 1984 */ 1985static int 1986ufsfifo_close(ap) 1987 struct vop_close_args /* { 1988 struct vnode *a_vp; 1989 int a_fflag; 1990 struct ucred *a_cred; 1991 struct thread *a_td; 1992 } */ *ap; 1993{ 1994 struct vnode *vp = ap->a_vp; 1995 1996 VI_LOCK(vp); 1997 if (vp->v_usecount > 1) 1998 ufs_itimes(vp); 1999 VI_UNLOCK(vp); 2000 return (fifo_specops.vop_close(ap)); 2001} 2002 2003/* 2004 * Kqfilter wrapper for fifos. 2005 * 2006 * Fall through to ufs kqfilter routines if needed 2007 */ 2008static int 2009ufsfifo_kqfilter(ap) 2010 struct vop_kqfilter_args *ap; 2011{ 2012 int error; 2013 2014 error = fifo_specops.vop_kqfilter(ap); 2015 if (error) 2016 error = vfs_kqfilter(ap); 2017 return (error); 2018} 2019 2020/* 2021 * Return POSIX pathconf information applicable to ufs filesystems. 2022 */ 2023static int 2024ufs_pathconf(ap) 2025 struct vop_pathconf_args /* { 2026 struct vnode *a_vp; 2027 int a_name; 2028 int *a_retval; 2029 } */ *ap; 2030{ 2031 int error; 2032 2033 error = 0; 2034 switch (ap->a_name) { 2035 case _PC_LINK_MAX: 2036 *ap->a_retval = LINK_MAX; 2037 break; 2038 case _PC_NAME_MAX: 2039 *ap->a_retval = NAME_MAX; 2040 break; 2041 case _PC_PATH_MAX: 2042 *ap->a_retval = PATH_MAX; 2043 break; 2044 case _PC_PIPE_BUF: 2045 *ap->a_retval = PIPE_BUF; 2046 break; 2047 case _PC_CHOWN_RESTRICTED: 2048 *ap->a_retval = 1; 2049 break; 2050 case _PC_NO_TRUNC: 2051 *ap->a_retval = 1; 2052 break; 2053 case _PC_ACL_EXTENDED: 2054#ifdef UFS_ACL 2055 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) 2056 *ap->a_retval = 1; 2057 else 2058 *ap->a_retval = 0; 2059#else 2060 *ap->a_retval = 0; 2061#endif 2062 break; 2063 case _PC_ACL_PATH_MAX: 2064#ifdef UFS_ACL 2065 if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) 2066 *ap->a_retval = ACL_MAX_ENTRIES; 2067 else 2068 *ap->a_retval = 3; 2069#else 2070 *ap->a_retval = 3; 2071#endif 2072 break; 2073 case _PC_MAC_PRESENT: 2074#ifdef MAC 2075 if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL) 2076 *ap->a_retval = 1; 2077 else 2078 *ap->a_retval = 0; 2079#else 2080 *ap->a_retval = 0; 2081#endif 2082 break; 2083 case _PC_ASYNC_IO: 2084 /* _PC_ASYNC_IO should have been handled by upper layers. */ 2085 KASSERT(0, ("_PC_ASYNC_IO should not get here")); 2086 error = EINVAL; 2087 break; 2088 case _PC_PRIO_IO: 2089 *ap->a_retval = 0; 2090 break; 2091 case _PC_SYNC_IO: 2092 *ap->a_retval = 0; 2093 break; 2094 case _PC_ALLOC_SIZE_MIN: 2095 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; 2096 break; 2097 case _PC_FILESIZEBITS: 2098 *ap->a_retval = 64; 2099 break; 2100 case _PC_REC_INCR_XFER_SIZE: 2101 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 2102 break; 2103 case _PC_REC_MAX_XFER_SIZE: 2104 *ap->a_retval = -1; /* means ``unlimited'' */ 2105 break; 2106 case _PC_REC_MIN_XFER_SIZE: 2107 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 2108 break; 2109 case _PC_REC_XFER_ALIGN: 2110 *ap->a_retval = PAGE_SIZE; 2111 break; 2112 case _PC_SYMLINK_MAX: 2113 *ap->a_retval = MAXPATHLEN; 2114 break; 2115 2116 default: 2117 error = EINVAL; 2118 break; 2119 } 2120 return (error); 2121} 2122 2123/* 2124 * Advisory record locking support 2125 */ 2126static int 2127ufs_advlock(ap) 2128 struct vop_advlock_args /* { 2129 struct vnode *a_vp; 2130 caddr_t a_id; 2131 int a_op; 2132 struct flock *a_fl; 2133 int a_flags; 2134 } */ *ap; 2135{ 2136 struct inode *ip = VTOI(ap->a_vp); 2137 2138 return (lf_advlock(ap, &(ip->i_lockf), ip->i_size)); 2139} 2140 2141/* 2142 * Initialize the vnode associated with a new inode, handle aliased 2143 * vnodes. 2144 */ 2145int 2146ufs_vinit(mntp, fifoops, vpp) 2147 struct mount *mntp; 2148 struct vop_vector *fifoops; 2149 struct vnode **vpp; 2150{ 2151 struct inode *ip; 2152 struct vnode *vp; 2153 2154 vp = *vpp; 2155 ip = VTOI(vp); 2156 vp->v_type = IFTOVT(ip->i_mode); 2157 if (vp->v_type == VFIFO) 2158 vp->v_op = fifoops; 2159 ASSERT_VOP_LOCKED(vp, "ufs_vinit"); 2160 if (ip->i_number == ROOTINO) 2161 vp->v_vflag |= VV_ROOT; 2162 ip->i_modrev = init_va_filerev(); 2163 *vpp = vp; 2164 return (0); 2165} 2166 2167/* 2168 * Allocate a new inode. 2169 * Vnode dvp must be locked. 2170 */ 2171static int 2172ufs_makeinode(mode, dvp, vpp, cnp) 2173 int mode; 2174 struct vnode *dvp; 2175 struct vnode **vpp; 2176 struct componentname *cnp; 2177{ 2178 struct inode *ip, *pdir; 2179 struct direct newdir; 2180 struct vnode *tvp; 2181#ifdef UFS_ACL 2182 struct acl *acl; 2183#endif 2184 int error; 2185 2186 pdir = VTOI(dvp); 2187#ifdef DIAGNOSTIC 2188 if ((cnp->cn_flags & HASBUF) == 0) 2189 panic("ufs_makeinode: no name"); 2190#endif 2191 *vpp = NULL; 2192 if ((mode & IFMT) == 0) 2193 mode |= IFREG; 2194 2195 error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp); 2196 if (error) 2197 return (error); 2198 ip = VTOI(tvp); 2199 ip->i_gid = pdir->i_gid; 2200 DIP_SET(ip, i_gid, pdir->i_gid); 2201#ifdef SUIDDIR 2202 { 2203#ifdef QUOTA 2204 struct ucred ucred, *ucp; 2205 ucp = cnp->cn_cred; 2206#endif 2207 /* 2208 * If we are not the owner of the directory, 2209 * and we are hacking owners here, (only do this where told to) 2210 * and we are not giving it TO root, (would subvert quotas) 2211 * then go ahead and give it to the other user. 2212 * Note that this drops off the execute bits for security. 2213 */ 2214 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && 2215 (pdir->i_mode & ISUID) && 2216 (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { 2217 ip->i_uid = pdir->i_uid; 2218 DIP_SET(ip, i_uid, ip->i_uid); 2219 mode &= ~07111; 2220#ifdef QUOTA 2221 /* 2222 * Make sure the correct user gets charged 2223 * for the space. 2224 * Quickly knock up a dummy credential for the victim. 2225 * XXX This seems to never be accessed out of our 2226 * context so a stack variable is ok. 2227 */ 2228 refcount_init(&ucred.cr_ref, 1); 2229 ucred.cr_uid = ip->i_uid; 2230 ucred.cr_ngroups = 1; 2231 ucred.cr_groups[0] = pdir->i_gid; 2232 ucp = &ucred; 2233#endif 2234 } else { 2235 ip->i_uid = cnp->cn_cred->cr_uid; 2236 DIP_SET(ip, i_uid, ip->i_uid); 2237 } 2238 2239#ifdef QUOTA 2240 if ((error = getinoquota(ip)) || 2241 (error = chkiq(ip, 1, ucp, 0))) { 2242 UFS_VFREE(tvp, ip->i_number, mode); 2243 vput(tvp); 2244 return (error); 2245 } 2246#endif 2247 } 2248#else /* !SUIDDIR */ 2249 ip->i_uid = cnp->cn_cred->cr_uid; 2250 DIP_SET(ip, i_uid, ip->i_uid); 2251#ifdef QUOTA 2252 if ((error = getinoquota(ip)) || 2253 (error = chkiq(ip, 1, cnp->cn_cred, 0))) { 2254 UFS_VFREE(tvp, ip->i_number, mode); 2255 vput(tvp); 2256 return (error); 2257 } 2258#endif 2259#endif /* !SUIDDIR */ 2260 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 2261#ifdef UFS_ACL 2262 acl = NULL; 2263 if ((dvp->v_mount->mnt_flag & MNT_ACLS) != 0) { 2264 acl = uma_zalloc(acl_zone, M_WAITOK); 2265 2266 /* 2267 * Retrieve default ACL for parent, if any. 2268 */ 2269 error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cnp->cn_cred, 2270 cnp->cn_thread); 2271 switch (error) { 2272 case 0: 2273 /* 2274 * Retrieved a default ACL, so merge mode and ACL if 2275 * necessary. 2276 */ 2277 if (acl->acl_cnt != 0) { 2278 /* 2279 * Two possible ways for default ACL to not 2280 * be present. First, the EA can be 2281 * undefined, or second, the default ACL can 2282 * be blank. If it's blank, fall through to 2283 * the it's not defined case. 2284 */ 2285 mode = acl_posix1e_newfilemode(mode, acl); 2286 ip->i_mode = mode; 2287 DIP_SET(ip, i_mode, mode); 2288 ufs_sync_acl_from_inode(ip, acl); 2289 break; 2290 } 2291 /* FALLTHROUGH */ 2292 2293 case EOPNOTSUPP: 2294 /* 2295 * Just use the mode as-is. 2296 */ 2297 ip->i_mode = mode; 2298 DIP_SET(ip, i_mode, mode); 2299 uma_zfree(acl_zone, acl); 2300 acl = NULL; 2301 break; 2302 2303 default: 2304 UFS_VFREE(tvp, ip->i_number, mode); 2305 vput(tvp); 2306 uma_zfree(acl_zone, acl); 2307 acl = NULL; 2308 return (error); 2309 } 2310 } else { 2311#endif 2312 ip->i_mode = mode; 2313 DIP_SET(ip, i_mode, mode); 2314#ifdef UFS_ACL 2315 } 2316#endif 2317 tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ 2318 ip->i_effnlink = 1; 2319 ip->i_nlink = 1; 2320 DIP_SET(ip, i_nlink, 1); 2321 if (DOINGSOFTDEP(tvp)) 2322 softdep_change_linkcnt(ip); 2323 if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && 2324 suser_cred(cnp->cn_cred, SUSER_ALLOWJAIL)) { 2325 ip->i_mode &= ~ISGID; 2326 DIP_SET(ip, i_mode, ip->i_mode); 2327 } 2328 2329 if (cnp->cn_flags & ISWHITEOUT) { 2330 ip->i_flags |= UF_OPAQUE; 2331 DIP_SET(ip, i_flags, ip->i_flags); 2332 } 2333 2334 /* 2335 * Make sure inode goes to disk before directory entry. 2336 */ 2337 error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) | DOINGASYNC(tvp))); 2338 if (error) 2339 goto bad; 2340#ifdef MAC 2341 if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { 2342 error = mac_create_vnode_extattr(cnp->cn_cred, dvp->v_mount, 2343 dvp, tvp, cnp); 2344 if (error) 2345 goto bad; 2346 } 2347#endif 2348#ifdef UFS_ACL 2349 if (acl != NULL) { 2350 /* 2351 * XXX: If we abort now, will Soft Updates notify the extattr 2352 * code that the EAs for the file need to be released? 2353 */ 2354 error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cnp->cn_cred, 2355 cnp->cn_thread); 2356 switch (error) { 2357 case 0: 2358 break; 2359 2360 case EOPNOTSUPP: 2361 /* 2362 * XXX: This should not happen, as EOPNOTSUPP above was 2363 * supposed to free acl. 2364 */ 2365 printf("ufs_makeinode: VOP_GETACL() but no " 2366 "VOP_SETACL()\n"); 2367 /* panic("ufs_makeinode: VOP_GETACL() but no " 2368 "VOP_SETACL()"); */ 2369 break; 2370 2371 default: 2372 uma_zfree(acl_zone, acl); 2373 goto bad; 2374 } 2375 uma_zfree(acl_zone, acl); 2376 } 2377#endif /* !UFS_ACL */ 2378 ufs_makedirentry(ip, cnp, &newdir); 2379 error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL); 2380 if (error) 2381 goto bad; 2382 *vpp = tvp; 2383 return (0); 2384 2385bad: 2386 /* 2387 * Write error occurred trying to update the inode 2388 * or the directory so must deallocate the inode. 2389 */ 2390 ip->i_effnlink = 0; 2391 ip->i_nlink = 0; 2392 DIP_SET(ip, i_nlink, 0); 2393 ip->i_flag |= IN_CHANGE; 2394 if (DOINGSOFTDEP(tvp)) 2395 softdep_change_linkcnt(ip); 2396 vput(tvp); 2397 return (error); 2398} 2399 2400/* Global vfs data structures for ufs. */ 2401struct vop_vector ufs_vnodeops = { 2402 .vop_default = &default_vnodeops, 2403 .vop_fsync = VOP_PANIC, 2404 .vop_read = VOP_PANIC, 2405 .vop_reallocblks = VOP_PANIC, 2406 .vop_write = VOP_PANIC, 2407 .vop_access = ufs_access, 2408 .vop_advlock = ufs_advlock, 2409 .vop_bmap = ufs_bmap, 2410 .vop_cachedlookup = ufs_lookup, 2411 .vop_close = ufs_close, 2412 .vop_create = ufs_create, 2413 .vop_getattr = ufs_getattr, 2414 .vop_inactive = ufs_inactive, 2415 .vop_link = ufs_link, 2416 .vop_lookup = vfs_cache_lookup, 2417 .vop_mkdir = ufs_mkdir, 2418 .vop_mknod = ufs_mknod, 2419 .vop_open = ufs_open, 2420 .vop_pathconf = ufs_pathconf, 2421 .vop_poll = vop_stdpoll, 2422 .vop_print = ufs_print, 2423 .vop_readdir = ufs_readdir, 2424 .vop_readlink = ufs_readlink, 2425 .vop_reclaim = ufs_reclaim, 2426 .vop_remove = ufs_remove, 2427 .vop_rename = ufs_rename, 2428 .vop_rmdir = ufs_rmdir, 2429 .vop_setattr = ufs_setattr, 2430#ifdef MAC 2431 .vop_setlabel = vop_stdsetlabel_ea, 2432#endif 2433 .vop_strategy = ufs_strategy, 2434 .vop_symlink = ufs_symlink, 2435 .vop_whiteout = ufs_whiteout, 2436#ifdef UFS_EXTATTR 2437 .vop_getextattr = ufs_getextattr, 2438 .vop_deleteextattr = ufs_deleteextattr, 2439 .vop_setextattr = ufs_setextattr, 2440#endif 2441#ifdef UFS_ACL 2442 .vop_getacl = ufs_getacl, 2443 .vop_setacl = ufs_setacl, 2444 .vop_aclcheck = ufs_aclcheck, 2445#endif 2446}; 2447 2448struct vop_vector ufs_fifoops = { 2449 .vop_default = &fifo_specops, 2450 .vop_fsync = VOP_PANIC, 2451 .vop_access = ufs_access, 2452 .vop_close = ufsfifo_close, 2453 .vop_getattr = ufs_getattr, 2454 .vop_inactive = ufs_inactive, 2455 .vop_kqfilter = ufsfifo_kqfilter, 2456 .vop_print = ufs_print, 2457 .vop_read = VOP_PANIC, 2458 .vop_reclaim = ufs_reclaim, 2459 .vop_setattr = ufs_setattr, 2460#ifdef MAC 2461 .vop_setlabel = vop_stdsetlabel_ea, 2462#endif 2463 .vop_write = VOP_PANIC, 2464#ifdef UFS_EXTATTR 2465 .vop_getextattr = ufs_getextattr, 2466 .vop_deleteextattr = ufs_deleteextattr, 2467 .vop_setextattr = ufs_setextattr, 2468#endif 2469#ifdef UFS_ACL 2470 .vop_getacl = ufs_getacl, 2471 .vop_setacl = ufs_setacl, 2472 .vop_aclcheck = ufs_aclcheck, 2473#endif 2474}; 2475