1/* $NetBSD: ufs_vnops.c,v 1.206 2011/11/18 21:18:52 christos Exp $ */ 2 3/*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Wasabi Systems, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32/* 33 * Copyright (c) 1982, 1986, 1989, 1993, 1995 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)ufs_vnops.c 8.28 (Berkeley) 7/31/95 66 */ 67 68#include <sys/cdefs.h> 69__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.206 2011/11/18 21:18:52 christos Exp $"); 70 71#if defined(_KERNEL_OPT) 72#include "opt_ffs.h" 73#include "opt_quota.h" 74#endif 75 76#include <sys/param.h> 77#include <sys/systm.h> 78#include <sys/namei.h> 79#include <sys/resourcevar.h> 80#include <sys/kernel.h> 81#include <sys/file.h> 82#include <sys/stat.h> 83#include <sys/buf.h> 84#include <sys/proc.h> 85#include <sys/mount.h> 86#include <sys/vnode.h> 87#include <sys/kmem.h> 88#include <sys/malloc.h> 89#include <sys/dirent.h> 90#include <sys/lockf.h> 91#include <sys/kauth.h> 92#include <sys/wapbl.h> 93#include <sys/fstrans.h> 94 95#include <miscfs/specfs/specdev.h> 96#include <miscfs/fifofs/fifo.h> 97#include <miscfs/genfs/genfs.h> 98 99#include <ufs/ufs/inode.h> 100#include <ufs/ufs/dir.h> 101#include <ufs/ufs/ufsmount.h> 102#include <ufs/ufs/ufs_bswap.h> 103#include <ufs/ufs/ufs_extern.h> 104#include <ufs/ufs/ufs_wapbl.h> 105#ifdef UFS_DIRHASH 106#include <ufs/ufs/dirhash.h> 107#endif 108#include <ufs/ext2fs/ext2fs_extern.h> 109#include <ufs/ext2fs/ext2fs_dir.h> 110#include <ufs/ffs/ffs_extern.h> 111#include <ufs/lfs/lfs_extern.h> 112#include <ufs/lfs/lfs.h> 113 114#include <uvm/uvm.h> 115 116__CTASSERT(EXT2FS_MAXNAMLEN == FFS_MAXNAMLEN); 117__CTASSERT(LFS_MAXNAMLEN == FFS_MAXNAMLEN); 118 119static int ufs_chmod(struct vnode *, int, kauth_cred_t, struct lwp *); 120static int ufs_chown(struct vnode *, uid_t, gid_t, kauth_cred_t, 121 struct lwp *); 122 123/* 124 * A virgin directory (no blushing please). 125 */ 126static const struct dirtemplate mastertemplate = { 127 0, 12, DT_DIR, 1, ".", 128 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." 129}; 130 131/* 132 * Create a regular file 133 */ 134int 135ufs_create(void *v) 136{ 137 struct vop_create_args /* { 138 struct vnode *a_dvp; 139 struct vnode **a_vpp; 140 struct componentname *a_cnp; 141 struct vattr *a_vap; 142 } */ *ap = v; 143 int error; 144 struct vnode *dvp = ap->a_dvp; 145 struct ufs_lookup_results *ulr; 146 147 /* XXX should handle this material another way */ 148 ulr = &VTOI(dvp)->i_crap; 149 UFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 150 151 /* 152 * UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful 153 * ufs_makeinode 154 */ 155 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 156 error = 157 ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), 158 dvp, ulr, ap->a_vpp, ap->a_cnp); 159 if (error) { 160 fstrans_done(dvp->v_mount); 161 return (error); 162 } 163 UFS_WAPBL_END1(dvp->v_mount, dvp); 164 fstrans_done(dvp->v_mount); 165 VN_KNOTE(dvp, NOTE_WRITE); 166 return (0); 167} 168 169/* 170 * Mknod vnode call 171 */ 172/* ARGSUSED */ 173int 174ufs_mknod(void *v) 175{ 176 struct vop_mknod_args /* { 177 struct vnode *a_dvp; 178 struct vnode **a_vpp; 179 struct componentname *a_cnp; 180 struct vattr *a_vap; 181 } */ *ap = v; 182 struct vattr *vap; 183 struct vnode **vpp; 184 struct inode *ip; 185 int error; 186 struct mount *mp; 187 ino_t ino; 188 struct ufs_lookup_results *ulr; 189 190 vap = ap->a_vap; 191 vpp = ap->a_vpp; 192 193 /* XXX should handle this material another way */ 194 ulr = &VTOI(ap->a_dvp)->i_crap; 195 UFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp)); 196 197 /* 198 * UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful 199 * ufs_makeinode 200 */ 201 fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED); 202 if ((error = 203 ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 204 ap->a_dvp, ulr, vpp, ap->a_cnp)) != 0) 205 goto out; 206 VN_KNOTE(ap->a_dvp, NOTE_WRITE); 207 ip = VTOI(*vpp); 208 mp = (*vpp)->v_mount; 209 ino = ip->i_number; 210 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 211 if (vap->va_rdev != VNOVAL) { 212 struct ufsmount *ump = ip->i_ump; 213 /* 214 * Want to be able to use this to make badblock 215 * inodes, so don't truncate the dev number. 216 */ 217 if (ump->um_fstype == UFS1) 218 ip->i_ffs1_rdev = ufs_rw32(vap->va_rdev, 219 UFS_MPNEEDSWAP(ump)); 220 else 221 ip->i_ffs2_rdev = ufs_rw64(vap->va_rdev, 222 UFS_MPNEEDSWAP(ump)); 223 } 224 UFS_WAPBL_UPDATE(*vpp, NULL, NULL, 0); 225 UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp); 226 /* 227 * Remove inode so that it will be reloaded by VFS_VGET and 228 * checked to see if it is an alias of an existing entry in 229 * the inode cache. 230 */ 231 (*vpp)->v_type = VNON; 232 VOP_UNLOCK(*vpp); 233 vgone(*vpp); 234 error = VFS_VGET(mp, ino, vpp); 235out: 236 fstrans_done(ap->a_dvp->v_mount); 237 if (error != 0) { 238 *vpp = NULL; 239 return (error); 240 } 241 return (0); 242} 243 244/* 245 * Open called. 246 * 247 * Nothing to do. 248 */ 249/* ARGSUSED */ 250int 251ufs_open(void *v) 252{ 253 struct vop_open_args /* { 254 struct vnode *a_vp; 255 int a_mode; 256 kauth_cred_t a_cred; 257 } */ *ap = v; 258 259 /* 260 * Files marked append-only must be opened for appending. 261 */ 262 if ((VTOI(ap->a_vp)->i_flags & APPEND) && 263 (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) 264 return (EPERM); 265 return (0); 266} 267 268/* 269 * Close called. 270 * 271 * Update the times on the inode. 272 */ 273/* ARGSUSED */ 274int 275ufs_close(void *v) 276{ 277 struct vop_close_args /* { 278 struct vnode *a_vp; 279 int a_fflag; 280 kauth_cred_t a_cred; 281 } */ *ap = v; 282 struct vnode *vp; 283 struct inode *ip; 284 285 vp = ap->a_vp; 286 ip = VTOI(vp); 287 fstrans_start(vp->v_mount, FSTRANS_SHARED); 288 if (vp->v_usecount > 1) 289 UFS_ITIMES(vp, NULL, NULL, NULL); 290 fstrans_done(vp->v_mount); 291 return (0); 292} 293 294static int 295ufs_check_possible(struct vnode *vp, struct inode *ip, mode_t mode, 296 kauth_cred_t cred) 297{ 298#if defined(QUOTA) || defined(QUOTA2) 299 int error; 300#endif 301 302 /* 303 * Disallow write attempts on read-only file systems; 304 * unless the file is a socket, fifo, or a block or 305 * character device resident on the file system. 306 */ 307 if (mode & VWRITE) { 308 switch (vp->v_type) { 309 case VDIR: 310 case VLNK: 311 case VREG: 312 if (vp->v_mount->mnt_flag & MNT_RDONLY) 313 return (EROFS); 314#if defined(QUOTA) || defined(QUOTA2) 315 fstrans_start(vp->v_mount, FSTRANS_SHARED); 316 error = chkdq(ip, 0, cred, 0); 317 fstrans_done(vp->v_mount); 318 if (error != 0) 319 return error; 320#endif 321 break; 322 case VBAD: 323 case VBLK: 324 case VCHR: 325 case VSOCK: 326 case VFIFO: 327 case VNON: 328 default: 329 break; 330 } 331 } 332 333 /* If it is a snapshot, nobody gets access to it. */ 334 if ((ip->i_flags & SF_SNAPSHOT)) 335 return (EPERM); 336 /* If immutable bit set, nobody gets to write it. */ 337 if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE)) 338 return (EPERM); 339 340 return 0; 341} 342 343static int 344ufs_check_permitted(struct vnode *vp, struct inode *ip, mode_t mode, 345 kauth_cred_t cred) 346{ 347 348 return genfs_can_access(vp->v_type, ip->i_mode & ALLPERMS, ip->i_uid, 349 ip->i_gid, mode, cred); 350} 351 352int 353ufs_access(void *v) 354{ 355 struct vop_access_args /* { 356 struct vnode *a_vp; 357 int a_mode; 358 kauth_cred_t a_cred; 359 } */ *ap = v; 360 struct vnode *vp; 361 struct inode *ip; 362 mode_t mode; 363 int error; 364 365 vp = ap->a_vp; 366 ip = VTOI(vp); 367 mode = ap->a_mode; 368 369 error = ufs_check_possible(vp, ip, mode, ap->a_cred); 370 if (error) 371 return error; 372 373 error = ufs_check_permitted(vp, ip, mode, ap->a_cred); 374 375 return error; 376} 377 378/* ARGSUSED */ 379int 380ufs_getattr(void *v) 381{ 382 struct vop_getattr_args /* { 383 struct vnode *a_vp; 384 struct vattr *a_vap; 385 kauth_cred_t a_cred; 386 } */ *ap = v; 387 struct vnode *vp; 388 struct inode *ip; 389 struct vattr *vap; 390 391 vp = ap->a_vp; 392 ip = VTOI(vp); 393 vap = ap->a_vap; 394 fstrans_start(vp->v_mount, FSTRANS_SHARED); 395 UFS_ITIMES(vp, NULL, NULL, NULL); 396 397 /* 398 * Copy from inode table 399 */ 400 vap->va_fsid = ip->i_dev; 401 vap->va_fileid = ip->i_number; 402 vap->va_mode = ip->i_mode & ALLPERMS; 403 vap->va_nlink = ip->i_nlink; 404 vap->va_uid = ip->i_uid; 405 vap->va_gid = ip->i_gid; 406 vap->va_size = vp->v_size; 407 if (ip->i_ump->um_fstype == UFS1) { 408 vap->va_rdev = (dev_t)ufs_rw32(ip->i_ffs1_rdev, 409 UFS_MPNEEDSWAP(ip->i_ump)); 410 vap->va_atime.tv_sec = ip->i_ffs1_atime; 411 vap->va_atime.tv_nsec = ip->i_ffs1_atimensec; 412 vap->va_mtime.tv_sec = ip->i_ffs1_mtime; 413 vap->va_mtime.tv_nsec = ip->i_ffs1_mtimensec; 414 vap->va_ctime.tv_sec = ip->i_ffs1_ctime; 415 vap->va_ctime.tv_nsec = ip->i_ffs1_ctimensec; 416 vap->va_birthtime.tv_sec = 0; 417 vap->va_birthtime.tv_nsec = 0; 418 vap->va_bytes = dbtob((u_quad_t)ip->i_ffs1_blocks); 419 } else { 420 vap->va_rdev = (dev_t)ufs_rw64(ip->i_ffs2_rdev, 421 UFS_MPNEEDSWAP(ip->i_ump)); 422 vap->va_atime.tv_sec = ip->i_ffs2_atime; 423 vap->va_atime.tv_nsec = ip->i_ffs2_atimensec; 424 vap->va_mtime.tv_sec = ip->i_ffs2_mtime; 425 vap->va_mtime.tv_nsec = ip->i_ffs2_mtimensec; 426 vap->va_ctime.tv_sec = ip->i_ffs2_ctime; 427 vap->va_ctime.tv_nsec = ip->i_ffs2_ctimensec; 428 vap->va_birthtime.tv_sec = ip->i_ffs2_birthtime; 429 vap->va_birthtime.tv_nsec = ip->i_ffs2_birthnsec; 430 vap->va_bytes = dbtob(ip->i_ffs2_blocks); 431 } 432 vap->va_gen = ip->i_gen; 433 vap->va_flags = ip->i_flags; 434 435 /* this doesn't belong here */ 436 if (vp->v_type == VBLK) 437 vap->va_blocksize = BLKDEV_IOSIZE; 438 else if (vp->v_type == VCHR) 439 vap->va_blocksize = MAXBSIZE; 440 else 441 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 442 vap->va_type = vp->v_type; 443 vap->va_filerev = ip->i_modrev; 444 fstrans_done(vp->v_mount); 445 return (0); 446} 447 448/* 449 * Set attribute vnode op. called from several syscalls 450 */ 451int 452ufs_setattr(void *v) 453{ 454 struct vop_setattr_args /* { 455 struct vnode *a_vp; 456 struct vattr *a_vap; 457 kauth_cred_t a_cred; 458 } */ *ap = v; 459 struct vattr *vap; 460 struct vnode *vp; 461 struct inode *ip; 462 kauth_cred_t cred; 463 struct lwp *l; 464 int error; 465 466 vap = ap->a_vap; 467 vp = ap->a_vp; 468 ip = VTOI(vp); 469 cred = ap->a_cred; 470 l = curlwp; 471 472 /* 473 * Check for unsettable attributes. 474 */ 475 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || 476 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || 477 (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || 478 ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { 479 return (EINVAL); 480 } 481 482 fstrans_start(vp->v_mount, FSTRANS_SHARED); 483 484 if (vap->va_flags != VNOVAL) { 485 if (vp->v_mount->mnt_flag & MNT_RDONLY) { 486 error = EROFS; 487 goto out; 488 } 489 if (kauth_cred_geteuid(cred) != ip->i_uid && 490 (error = kauth_authorize_generic(cred, 491 KAUTH_GENERIC_ISSUSER, NULL))) 492 goto out; 493 if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 494 NULL) == 0) { 495 if ((ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) && 496 kauth_authorize_system(l->l_cred, 497 KAUTH_SYSTEM_CHSYSFLAGS, 0, NULL, NULL, NULL)) { 498 error = EPERM; 499 goto out; 500 } 501 /* Snapshot flag cannot be set or cleared */ 502 if ((vap->va_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) != 503 (ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL))) { 504 error = EPERM; 505 goto out; 506 } 507 error = UFS_WAPBL_BEGIN(vp->v_mount); 508 if (error) 509 goto out; 510 ip->i_flags = vap->va_flags; 511 DIP_ASSIGN(ip, flags, ip->i_flags); 512 } else { 513 if ((ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) || 514 (vap->va_flags & UF_SETTABLE) != vap->va_flags) { 515 error = EPERM; 516 goto out; 517 } 518 if ((ip->i_flags & SF_SETTABLE) != 519 (vap->va_flags & SF_SETTABLE)) { 520 error = EPERM; 521 goto out; 522 } 523 error = UFS_WAPBL_BEGIN(vp->v_mount); 524 if (error) 525 goto out; 526 ip->i_flags &= SF_SETTABLE; 527 ip->i_flags |= (vap->va_flags & UF_SETTABLE); 528 DIP_ASSIGN(ip, flags, ip->i_flags); 529 } 530 ip->i_flag |= IN_CHANGE; 531 UFS_WAPBL_UPDATE(vp, NULL, NULL, 0); 532 UFS_WAPBL_END(vp->v_mount); 533 if (vap->va_flags & (IMMUTABLE | APPEND)) { 534 error = 0; 535 goto out; 536 } 537 } 538 if (ip->i_flags & (IMMUTABLE | APPEND)) { 539 error = EPERM; 540 goto out; 541 } 542 /* 543 * Go through the fields and update iff not VNOVAL. 544 */ 545 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 546 if (vp->v_mount->mnt_flag & MNT_RDONLY) { 547 error = EROFS; 548 goto out; 549 } 550 error = UFS_WAPBL_BEGIN(vp->v_mount); 551 if (error) 552 goto out; 553 error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, l); 554 UFS_WAPBL_END(vp->v_mount); 555 if (error) 556 goto out; 557 } 558 if (vap->va_size != VNOVAL) { 559 /* 560 * Disallow write attempts on read-only file systems; 561 * unless the file is a socket, fifo, or a block or 562 * character device resident on the file system. 563 */ 564 switch (vp->v_type) { 565 case VDIR: 566 error = EISDIR; 567 goto out; 568 case VCHR: 569 case VBLK: 570 case VFIFO: 571 break; 572 case VREG: 573 if (vp->v_mount->mnt_flag & MNT_RDONLY) { 574 error = EROFS; 575 goto out; 576 } 577 if ((ip->i_flags & SF_SNAPSHOT) != 0) { 578 error = EPERM; 579 goto out; 580 } 581 error = UFS_WAPBL_BEGIN(vp->v_mount); 582 if (error) 583 goto out; 584 /* 585 * When journaling, only truncate one indirect block 586 * at a time. 587 */ 588 if (vp->v_mount->mnt_wapbl) { 589 uint64_t incr = MNINDIR(ip->i_ump) << 590 vp->v_mount->mnt_fs_bshift; /* Power of 2 */ 591 uint64_t base = NDADDR << 592 vp->v_mount->mnt_fs_bshift; 593 while (!error && ip->i_size > base + incr && 594 ip->i_size > vap->va_size + incr) { 595 /* 596 * round down to next full indirect 597 * block boundary. 598 */ 599 uint64_t nsize = base + 600 ((ip->i_size - base - 1) & 601 ~(incr - 1)); 602 error = UFS_TRUNCATE(vp, nsize, 0, 603 cred); 604 if (error == 0) { 605 UFS_WAPBL_END(vp->v_mount); 606 error = 607 UFS_WAPBL_BEGIN(vp->v_mount); 608 } 609 } 610 } 611 if (!error) 612 error = UFS_TRUNCATE(vp, vap->va_size, 0, cred); 613 UFS_WAPBL_END(vp->v_mount); 614 if (error) 615 goto out; 616 break; 617 default: 618 error = EOPNOTSUPP; 619 goto out; 620 } 621 } 622 ip = VTOI(vp); 623 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || 624 vap->va_birthtime.tv_sec != VNOVAL) { 625 if (vp->v_mount->mnt_flag & MNT_RDONLY) { 626 error = EROFS; 627 goto out; 628 } 629 if ((ip->i_flags & SF_SNAPSHOT) != 0) { 630 error = EPERM; 631 goto out; 632 } 633 error = genfs_can_chtimes(vp, vap->va_vaflags, ip->i_uid, cred); 634 if (error) 635 goto out; 636 error = UFS_WAPBL_BEGIN(vp->v_mount); 637 if (error) 638 goto out; 639 if (vap->va_atime.tv_sec != VNOVAL) 640 if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) 641 ip->i_flag |= IN_ACCESS; 642 if (vap->va_mtime.tv_sec != VNOVAL) { 643 ip->i_flag |= IN_CHANGE | IN_UPDATE; 644 if (vp->v_mount->mnt_flag & MNT_RELATIME) 645 ip->i_flag |= IN_ACCESS; 646 } 647 if (vap->va_birthtime.tv_sec != VNOVAL && 648 ip->i_ump->um_fstype == UFS2) { 649 ip->i_ffs2_birthtime = vap->va_birthtime.tv_sec; 650 ip->i_ffs2_birthnsec = vap->va_birthtime.tv_nsec; 651 } 652 error = UFS_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 0); 653 UFS_WAPBL_END(vp->v_mount); 654 if (error) 655 goto out; 656 } 657 error = 0; 658 if (vap->va_mode != (mode_t)VNOVAL) { 659 if (vp->v_mount->mnt_flag & MNT_RDONLY) { 660 error = EROFS; 661 goto out; 662 } 663 if ((ip->i_flags & SF_SNAPSHOT) != 0 && 664 (vap->va_mode & (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | 665 S_IXOTH | S_IWOTH))) { 666 error = EPERM; 667 goto out; 668 } 669 error = UFS_WAPBL_BEGIN(vp->v_mount); 670 if (error) 671 goto out; 672 error = ufs_chmod(vp, (int)vap->va_mode, cred, l); 673 UFS_WAPBL_END(vp->v_mount); 674 } 675 VN_KNOTE(vp, NOTE_ATTRIB); 676out: 677 fstrans_done(vp->v_mount); 678 return (error); 679} 680 681/* 682 * Change the mode on a file. 683 * Inode must be locked before calling. 684 */ 685static int 686ufs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct lwp *l) 687{ 688 struct inode *ip; 689 int error; 690 691 UFS_WAPBL_JLOCK_ASSERT(vp->v_mount); 692 693 ip = VTOI(vp); 694 695 error = genfs_can_chmod(vp, cred, ip->i_uid, ip->i_gid, mode); 696 if (error) 697 return (error); 698 699 fstrans_start(vp->v_mount, FSTRANS_SHARED); 700 ip->i_mode &= ~ALLPERMS; 701 ip->i_mode |= (mode & ALLPERMS); 702 ip->i_flag |= IN_CHANGE; 703 DIP_ASSIGN(ip, mode, ip->i_mode); 704 UFS_WAPBL_UPDATE(vp, NULL, NULL, 0); 705 fstrans_done(vp->v_mount); 706 return (0); 707} 708 709/* 710 * Perform chown operation on inode ip; 711 * inode must be locked prior to call. 712 */ 713static int 714ufs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, 715 struct lwp *l) 716{ 717 struct inode *ip; 718 int error = 0; 719#if defined(QUOTA) || defined(QUOTA2) 720 uid_t ouid; 721 gid_t ogid; 722 int64_t change; 723#endif 724 ip = VTOI(vp); 725 error = 0; 726 727 if (uid == (uid_t)VNOVAL) 728 uid = ip->i_uid; 729 if (gid == (gid_t)VNOVAL) 730 gid = ip->i_gid; 731 732 error = genfs_can_chown(vp, cred, ip->i_uid, ip->i_gid, uid, gid); 733 if (error) 734 return (error); 735 736 fstrans_start(vp->v_mount, FSTRANS_SHARED); 737#if defined(QUOTA) || defined(QUOTA2) 738 ogid = ip->i_gid; 739 ouid = ip->i_uid; 740 change = DIP(ip, blocks); 741 (void) chkdq(ip, -change, cred, 0); 742 (void) chkiq(ip, -1, cred, 0); 743#endif 744 ip->i_gid = gid; 745 DIP_ASSIGN(ip, gid, gid); 746 ip->i_uid = uid; 747 DIP_ASSIGN(ip, uid, uid); 748#if defined(QUOTA) || defined(QUOTA2) 749 if ((error = chkdq(ip, change, cred, 0)) == 0) { 750 if ((error = chkiq(ip, 1, cred, 0)) == 0) 751 goto good; 752 else 753 (void) chkdq(ip, -change, cred, FORCE); 754 } 755 ip->i_gid = ogid; 756 DIP_ASSIGN(ip, gid, ogid); 757 ip->i_uid = ouid; 758 DIP_ASSIGN(ip, uid, ouid); 759 (void) chkdq(ip, change, cred, FORCE); 760 (void) chkiq(ip, 1, cred, FORCE); 761 fstrans_done(vp->v_mount); 762 return (error); 763 good: 764#endif /* QUOTA || QUOTA2 */ 765 ip->i_flag |= IN_CHANGE; 766 UFS_WAPBL_UPDATE(vp, NULL, NULL, 0); 767 fstrans_done(vp->v_mount); 768 return (0); 769} 770 771int 772ufs_remove(void *v) 773{ 774 struct vop_remove_args /* { 775 struct vnode *a_dvp; 776 struct vnode *a_vp; 777 struct componentname *a_cnp; 778 } */ *ap = v; 779 struct vnode *vp, *dvp; 780 struct inode *ip; 781 int error; 782 struct ufs_lookup_results *ulr; 783 784 vp = ap->a_vp; 785 dvp = ap->a_dvp; 786 ip = VTOI(vp); 787 788 /* XXX should handle this material another way */ 789 ulr = &VTOI(dvp)->i_crap; 790 UFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 791 792 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 793 if (vp->v_type == VDIR || (ip->i_flags & (IMMUTABLE | APPEND)) || 794 (VTOI(dvp)->i_flags & APPEND)) 795 error = EPERM; 796 else { 797 error = UFS_WAPBL_BEGIN(dvp->v_mount); 798 if (error == 0) { 799 error = ufs_dirremove(dvp, ulr, 800 ip, ap->a_cnp->cn_flags, 0); 801 UFS_WAPBL_END(dvp->v_mount); 802 } 803 } 804 VN_KNOTE(vp, NOTE_DELETE); 805 VN_KNOTE(dvp, NOTE_WRITE); 806 if (dvp == vp) 807 vrele(vp); 808 else 809 vput(vp); 810 vput(dvp); 811 fstrans_done(dvp->v_mount); 812 return (error); 813} 814 815/* 816 * ufs_link: create hard link. 817 */ 818int 819ufs_link(void *v) 820{ 821 struct vop_link_args /* { 822 struct vnode *a_dvp; 823 struct vnode *a_vp; 824 struct componentname *a_cnp; 825 } */ *ap = v; 826 struct vnode *dvp = ap->a_dvp; 827 struct vnode *vp = ap->a_vp; 828 struct componentname *cnp = ap->a_cnp; 829 struct inode *ip; 830 struct direct *newdir; 831 int error; 832 struct ufs_lookup_results *ulr; 833 834 KASSERT(dvp != vp); 835 KASSERT(vp->v_type != VDIR); 836 KASSERT(dvp->v_mount == vp->v_mount); 837 838 /* XXX should handle this material another way */ 839 ulr = &VTOI(dvp)->i_crap; 840 UFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 841 842 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 843 error = vn_lock(vp, LK_EXCLUSIVE); 844 if (error) { 845 VOP_ABORTOP(dvp, cnp); 846 goto out2; 847 } 848 ip = VTOI(vp); 849 if ((nlink_t)ip->i_nlink >= LINK_MAX) { 850 VOP_ABORTOP(dvp, cnp); 851 error = EMLINK; 852 goto out1; 853 } 854 if (ip->i_flags & (IMMUTABLE | APPEND)) { 855 VOP_ABORTOP(dvp, cnp); 856 error = EPERM; 857 goto out1; 858 } 859 error = UFS_WAPBL_BEGIN(vp->v_mount); 860 if (error) { 861 VOP_ABORTOP(dvp, cnp); 862 goto out1; 863 } 864 ip->i_nlink++; 865 DIP_ASSIGN(ip, nlink, ip->i_nlink); 866 ip->i_flag |= IN_CHANGE; 867 error = UFS_UPDATE(vp, NULL, NULL, UPDATE_DIROP); 868 if (!error) { 869 newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK); 870 ufs_makedirentry(ip, cnp, newdir); 871 error = ufs_direnter(dvp, ulr, vp, newdir, cnp, NULL); 872 pool_cache_put(ufs_direct_cache, newdir); 873 } 874 if (error) { 875 ip->i_nlink--; 876 DIP_ASSIGN(ip, nlink, ip->i_nlink); 877 ip->i_flag |= IN_CHANGE; 878 UFS_WAPBL_UPDATE(vp, NULL, NULL, UPDATE_DIROP); 879 } 880 UFS_WAPBL_END(vp->v_mount); 881 out1: 882 VOP_UNLOCK(vp); 883 out2: 884 VN_KNOTE(vp, NOTE_LINK); 885 VN_KNOTE(dvp, NOTE_WRITE); 886 vput(dvp); 887 fstrans_done(dvp->v_mount); 888 return (error); 889} 890 891/* 892 * whiteout vnode call 893 */ 894int 895ufs_whiteout(void *v) 896{ 897 struct vop_whiteout_args /* { 898 struct vnode *a_dvp; 899 struct componentname *a_cnp; 900 int a_flags; 901 } */ *ap = v; 902 struct vnode *dvp = ap->a_dvp; 903 struct componentname *cnp = ap->a_cnp; 904 struct direct *newdir; 905 int error; 906 struct ufsmount *ump = VFSTOUFS(dvp->v_mount); 907 struct ufs_lookup_results *ulr; 908 909 /* XXX should handle this material another way */ 910 ulr = &VTOI(dvp)->i_crap; 911 UFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 912 913 error = 0; 914 switch (ap->a_flags) { 915 case LOOKUP: 916 /* 4.4 format directories support whiteout operations */ 917 if (ump->um_maxsymlinklen > 0) 918 return (0); 919 return (EOPNOTSUPP); 920 921 case CREATE: 922 /* create a new directory whiteout */ 923 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 924 error = UFS_WAPBL_BEGIN(dvp->v_mount); 925 if (error) 926 break; 927#ifdef DIAGNOSTIC 928 if (ump->um_maxsymlinklen <= 0) 929 panic("ufs_whiteout: old format filesystem"); 930#endif 931 932 newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK); 933 newdir->d_ino = WINO; 934 newdir->d_namlen = cnp->cn_namelen; 935 memcpy(newdir->d_name, cnp->cn_nameptr, 936 (size_t)cnp->cn_namelen); 937 newdir->d_name[cnp->cn_namelen] = '\0'; 938 newdir->d_type = DT_WHT; 939 error = ufs_direnter(dvp, ulr, NULL, newdir, cnp, NULL); 940 pool_cache_put(ufs_direct_cache, newdir); 941 break; 942 943 case DELETE: 944 /* remove an existing directory whiteout */ 945 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 946 error = UFS_WAPBL_BEGIN(dvp->v_mount); 947 if (error) 948 break; 949#ifdef DIAGNOSTIC 950 if (ump->um_maxsymlinklen <= 0) 951 panic("ufs_whiteout: old format filesystem"); 952#endif 953 954 cnp->cn_flags &= ~DOWHITEOUT; 955 error = ufs_dirremove(dvp, ulr, NULL, cnp->cn_flags, 0); 956 break; 957 default: 958 panic("ufs_whiteout: unknown op"); 959 /* NOTREACHED */ 960 } 961 UFS_WAPBL_END(dvp->v_mount); 962 fstrans_done(dvp->v_mount); 963 return (error); 964} 965 966 967/* 968 * Rename vnode operation 969 * rename("foo", "bar"); 970 * is essentially 971 * unlink("bar"); 972 * link("foo", "bar"); 973 * unlink("foo"); 974 * but ``atomically''. Can't do full commit without saving state in the 975 * inode on disk which isn't feasible at this time. Best we can do is 976 * always guarantee the target exists. 977 * 978 * Basic algorithm is: 979 * 980 * 1) Bump link count on source while we're linking it to the 981 * target. This also ensure the inode won't be deleted out 982 * from underneath us while we work (it may be truncated by 983 * a concurrent `trunc' or `open' for creation). 984 * 2) Link source to destination. If destination already exists, 985 * delete it first. 986 * 3) Unlink source reference to inode if still around. If a 987 * directory was moved and the parent of the destination 988 * is different from the source, patch the ".." entry in the 989 * directory. 990 */ 991 992/* 993 * Notes on rename locking: 994 * 995 * We lock parent vnodes before child vnodes. This means in particular 996 * that if A is above B in the directory tree then A must be locked 997 * before B. (This is true regardless of how many steps appear in 998 * between, because an arbitrary number of other processes could lock 999 * parent/child in between and establish a lock cycle and deadlock.) 1000 * 1001 * Therefore, if tdvp is above fdvp we must lock tdvp first; if fdvp 1002 * is above tdvp we must lock fdvp first; and if they're 1003 * incommensurate it doesn't matter. (But, we rely on the fact that 1004 * there's a whole-volume rename lock to prevent deadlock among groups 1005 * of renames upon overlapping sets of incommensurate vnodes.) 1006 * 1007 * In addition to establishing lock ordering the parent check also 1008 * serves to rule out cases where someone tries to move a directory 1009 * underneath itself, e.g. rename("a/b", "a/b/c"). If allowed to 1010 * proceed such renames would detach portions of the directory tree 1011 * and make fsck very unhappy. 1012 * 1013 * Note that it is an error for *fvp* to be above tdvp; however, 1014 * *fdvp* can be above tdvp, as in rename("a/b", "a/c/d"). 1015 * 1016 * The parent check searches up the tree from tdvp until it either 1017 * finds fdvp or the root of the volume. It also returns the vnode it 1018 * saw immediately before fdvp, if any. Later on (after looking up 1019 * fvp) we will check to see if this *is* fvp and if so fail. 1020 * 1021 * If the parent check finds fdvp, it means fdvp is above tdvp, so we 1022 * lock fdvp first and then tdvp. Otherwise, either tdvp is above fdvp 1023 * or they're incommensurate and we lock tdvp first. 1024 * 1025 * In either case each of the child vnodes has to be looked up and 1026 * locked immediately after its parent. The cases 1027 * 1028 * fdvp/fvp/[.../]tdvp/tvp 1029 * tdvp/tvp/[.../]fdvp/fvp 1030 * 1031 * can cause deadlock otherwise. Note that both of these are error 1032 * cases; the first fails the parent check and the second fails 1033 * because tvp isn't empty. The parent check case is handled before 1034 * we start locking; however, the nonempty case requires locking tvp 1035 * to find out safely that it's nonempty. 1036 * 1037 * Therefore the procedure is either 1038 * 1039 * lock fdvp 1040 * lookup fvp 1041 * lock fvp 1042 * lock tdvp 1043 * lookup tvp 1044 * lock tvp 1045 * 1046 * or 1047 * 1048 * lock tdvp 1049 * lookup tvp 1050 * lock tvp 1051 * lock fdvp 1052 * lookup fvp 1053 * lock fvp 1054 * 1055 * This could in principle be simplified by always looking up fvp 1056 * last; because of the parent check we know by the time we start 1057 * locking that fvp cannot be directly above tdvp, so (given the 1058 * whole-volume rename lock and other assumptions) it's safe to lock 1059 * tdvp before fvp. This would allow the following scheme: 1060 * 1061 * lock fdvp 1062 * lock tdvp 1063 * or 1064 * lock tdvp 1065 * lock fdvp 1066 * 1067 * then 1068 * lookup tvp 1069 * lock tvp 1070 * lookup fvp 1071 * check if fvp is above of tdvp, fail if so 1072 * lock fvp 1073 * 1074 * which is much, much simpler. 1075 * 1076 * However, current levels of vfs namei/lookup sanity do not permit 1077 * this. It is impossible currently to look up fvp without locking it. 1078 * (It gets locked regardless of whether LOCKLEAF is set; without 1079 * LOCKLEAF it just gets unlocked again, which doesn't help.) 1080 * 1081 * Therefore, because we must look up fvp to know if it's above tdvp, 1082 * which locks fvp, we must, at least in the case where fdvp is above 1083 * tdvp, do that before locking tdvp. The longer scheme does that; the 1084 * simpler scheme is not safe. 1085 * 1086 * Note that for now we aren't doing lookup() but relookup(); however, 1087 * the differences are minor. 1088 * 1089 * On top of all the above, just to make everything more 1090 * exciting, any two of the vnodes might end up being the same. 1091 * 1092 * FROMPARENT == FROMCHILD mv a/. foo is an error. 1093 * FROMPARENT == TOPARENT mv a/b a/c is ok. 1094 * FROMPARENT == TOCHILD mv a/b/c a/b will give ENOTEMPTY. 1095 * FROMCHILD == TOPARENT mv a/b a/b/c fails the parent check. 1096 * FROMCHILD == TOCHILD mv a/b a/b is ok. 1097 * TOPARENT == TOCHILD mv foo a/. is an error. 1098 * 1099 * This introduces more cases in the locking, because each distinct 1100 * vnode must be locked exactly once. 1101 * 1102 * When FROMPARENT == TOPARENT and FROMCHILD != TOCHILD we assume it 1103 * doesn't matter what order the children are locked in, because the 1104 * per-volume rename lock excludes other renames and no other 1105 * operation locks two files in the same directory at once. (Note: if 1106 * it turns out that link() does, link() is wrong.) 1107 * 1108 * Until such time as we can do lookups without the namei and lookup 1109 * machinery "helpfully" locking the result vnode for us, we can't 1110 * avoid tripping on cases where FROMCHILD == TOCHILD. Currently for 1111 * non-directories we unlock the first one we lock while looking up 1112 * the second, then relock it if necessary. This is more or less 1113 * harmless since not much of interest can happen to the objects in 1114 * that window while we have the containing directory locked; but it's 1115 * not desirable and should be cleaned up when that becomes possible. 1116 * The right way to do it is to check after looking the second one up 1117 * and only lock it if it's different. (Note: for directories we don't 1118 * do this dance because the same directory can't appear more than 1119 * once.) 1120 */ 1121 1122/* XXX following lifted from ufs_lookup.c */ 1123#define FSFMT(vp) (((vp)->v_mount->mnt_iflag & IMNT_DTYPE) == 0) 1124 1125/* 1126 * Check if either entry referred to by FROM_ULR is within the range 1127 * of entries named by TO_ULR. 1128 */ 1129static int 1130ulr_overlap(const struct ufs_lookup_results *from_ulr, 1131 const struct ufs_lookup_results *to_ulr) 1132{ 1133 doff_t from_start, from_prevstart; 1134 doff_t to_start, to_end; 1135 1136 /* 1137 * FROM is a DELETE result; offset points to the entry to 1138 * remove and subtracting count gives the previous entry. 1139 */ 1140 from_start = from_ulr->ulr_offset - from_ulr->ulr_count; 1141 from_prevstart = from_ulr->ulr_offset; 1142 1143 /* 1144 * TO is a RENAME (thus non-DELETE) result; offset points 1145 * to the beginning of a region to write in, and adding 1146 * count gives the end of the region. 1147 */ 1148 to_start = to_ulr->ulr_offset; 1149 to_end = to_ulr->ulr_offset + to_ulr->ulr_count; 1150 1151 if (from_prevstart >= to_start && from_prevstart < to_end) { 1152 return 1; 1153 } 1154 if (from_start >= to_start && from_start < to_end) { 1155 return 1; 1156 } 1157 return 0; 1158} 1159 1160/* 1161 * Wrapper for relookup that also updates the supplemental results. 1162 */ 1163static int 1164do_relookup(struct vnode *dvp, struct ufs_lookup_results *ulr, 1165 struct vnode **vp, struct componentname *cnp) 1166{ 1167 int error; 1168 1169 error = relookup(dvp, vp, cnp, 0); 1170 if (error) { 1171 return error; 1172 } 1173 /* update the supplemental reasults */ 1174 *ulr = VTOI(dvp)->i_crap; 1175 UFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 1176 return 0; 1177} 1178 1179/* 1180 * Lock and relookup a sequence of two directories and two children. 1181 * 1182 */ 1183static int 1184lock_vnode_sequence(struct vnode *d1, struct ufs_lookup_results *ulr1, 1185 struct vnode **v1_ret, struct componentname *cn1, 1186 int v1_missing_ok, 1187 int overlap_error, 1188 struct vnode *d2, struct ufs_lookup_results *ulr2, 1189 struct vnode **v2_ret, struct componentname *cn2, 1190 int v2_missing_ok) 1191{ 1192 struct vnode *v1, *v2; 1193 int error; 1194 1195 KASSERT(d1 != d2); 1196 1197 vn_lock(d1, LK_EXCLUSIVE | LK_RETRY); 1198 if (VTOI(d1)->i_size == 0) { 1199 /* d1 has been rmdir'd */ 1200 VOP_UNLOCK(d1); 1201 return ENOENT; 1202 } 1203 error = do_relookup(d1, ulr1, &v1, cn1); 1204 if (v1_missing_ok) { 1205 if (error == ENOENT) { 1206 /* 1207 * Note: currently if the name doesn't exist, 1208 * relookup succeeds (it intercepts the 1209 * EJUSTRETURN from VOP_LOOKUP) and sets tvp 1210 * to NULL. Therefore, we will never get 1211 * ENOENT and this branch is not needed. 1212 * However, in a saner future the EJUSTRETURN 1213 * garbage will go away, so let's DTRT. 1214 */ 1215 v1 = NULL; 1216 error = 0; 1217 } 1218 } else { 1219 if (error == 0 && v1 == NULL) { 1220 /* This is what relookup sets if v1 disappeared. */ 1221 error = ENOENT; 1222 } 1223 } 1224 if (error) { 1225 VOP_UNLOCK(d1); 1226 return error; 1227 } 1228 if (v1 && v1 == d2) { 1229 VOP_UNLOCK(d1); 1230 VOP_UNLOCK(v1); 1231 vrele(v1); 1232 return overlap_error; 1233 } 1234 1235 /* 1236 * The right way to do this is to do lookups without locking 1237 * the results, and lock the results afterwards; then at the 1238 * end we can avoid trying to lock v2 if v2 == v1. 1239 * 1240 * However, for the reasons described in the fdvp == tdvp case 1241 * in rename below, we can't do that safely. So, in the case 1242 * where v1 is not a directory, unlock it and lock it again 1243 * afterwards. This is safe in locking order because a 1244 * non-directory can't be above anything else in the tree. If 1245 * v1 *is* a directory, that's not true, but then because d1 1246 * != d2, v1 != v2. 1247 */ 1248 if (v1 && v1->v_type != VDIR) { 1249 VOP_UNLOCK(v1); 1250 } 1251 vn_lock(d2, LK_EXCLUSIVE | LK_RETRY); 1252 if (VTOI(d2)->i_size == 0) { 1253 /* d2 has been rmdir'd */ 1254 VOP_UNLOCK(d2); 1255 if (v1 && v1->v_type == VDIR) { 1256 VOP_UNLOCK(v1); 1257 } 1258 VOP_UNLOCK(d1); 1259 if (v1) { 1260 vrele(v1); 1261 } 1262 return ENOENT; 1263 } 1264 error = do_relookup(d2, ulr2, &v2, cn2); 1265 if (v2_missing_ok) { 1266 if (error == ENOENT) { 1267 /* as above */ 1268 v2 = NULL; 1269 error = 0; 1270 } 1271 } else { 1272 if (error == 0 && v2 == NULL) { 1273 /* This is what relookup sets if v2 disappeared. */ 1274 error = ENOENT; 1275 } 1276 } 1277 if (error) { 1278 VOP_UNLOCK(d2); 1279 if (v1 && v1->v_type == VDIR) { 1280 VOP_UNLOCK(v1); 1281 } 1282 VOP_UNLOCK(d1); 1283 if (v1) { 1284 vrele(v1); 1285 } 1286 return error; 1287 } 1288 if (v1 && v1->v_type != VDIR && v1 != v2) { 1289 vn_lock(v1, LK_EXCLUSIVE | LK_RETRY); 1290 } 1291 *v1_ret = v1; 1292 *v2_ret = v2; 1293 return 0; 1294} 1295 1296/* 1297 * Rename vnode operation 1298 * rename("foo", "bar"); 1299 * is essentially 1300 * unlink("bar"); 1301 * link("foo", "bar"); 1302 * unlink("foo"); 1303 * but ``atomically''. Can't do full commit without saving state in the 1304 * inode on disk which isn't feasible at this time. Best we can do is 1305 * always guarantee the target exists. 1306 * 1307 * Basic algorithm is: 1308 * 1309 * 1) Bump link count on source while we're linking it to the 1310 * target. This also ensure the inode won't be deleted out 1311 * from underneath us while we work (it may be truncated by 1312 * a concurrent `trunc' or `open' for creation). 1313 * 2) Link source to destination. If destination already exists, 1314 * delete it first. 1315 * 3) Unlink source reference to inode if still around. If a 1316 * directory was moved and the parent of the destination 1317 * is different from the source, patch the ".." entry in the 1318 * directory. 1319 */ 1320int 1321ufs_rename(void *v) 1322{ 1323 struct vop_rename_args /* { 1324 struct vnode *a_fdvp; 1325 struct vnode *a_fvp; 1326 struct componentname *a_fcnp; 1327 struct vnode *a_tdvp; 1328 struct vnode *a_tvp; 1329 struct componentname *a_tcnp; 1330 } */ *ap = v; 1331 struct vnode *tvp, *tdvp, *fvp, *fdvp; 1332 struct componentname *tcnp, *fcnp; 1333 struct inode *ip, *txp, *fxp, *tdp, *fdp; 1334 struct mount *mp; 1335 struct direct *newdir; 1336 int doingdirectory, error; 1337 ino_t oldparent, newparent; 1338 1339 struct ufs_lookup_results from_ulr, to_ulr; 1340 1341 tvp = ap->a_tvp; 1342 tdvp = ap->a_tdvp; 1343 fvp = ap->a_fvp; 1344 fdvp = ap->a_fdvp; 1345 tcnp = ap->a_tcnp; 1346 fcnp = ap->a_fcnp; 1347 doingdirectory = error = 0; 1348 oldparent = newparent = 0; 1349 1350 /* save the supplemental lookup results as they currently exist */ 1351 from_ulr = VTOI(fdvp)->i_crap; 1352 to_ulr = VTOI(tdvp)->i_crap; 1353 UFS_CHECK_CRAPCOUNTER(VTOI(fdvp)); 1354 UFS_CHECK_CRAPCOUNTER(VTOI(tdvp)); 1355 1356 /* 1357 * Owing to VFS oddities we are currently called with tdvp/tvp 1358 * locked and not fdvp/fvp. In a sane world we'd be passed 1359 * tdvp and fdvp only, unlocked, and two name strings. Pretend 1360 * we have a sane world and unlock tdvp and tvp. 1361 */ 1362 VOP_UNLOCK(tdvp); 1363 if (tvp && tvp != tdvp) { 1364 VOP_UNLOCK(tvp); 1365 } 1366 1367 /* Also pretend we have a sane world and vrele fvp/tvp. */ 1368 vrele(fvp); 1369 fvp = NULL; 1370 if (tvp) { 1371 vrele(tvp); 1372 tvp = NULL; 1373 } 1374 1375 /* 1376 * Check for cross-device rename. 1377 */ 1378 if (fdvp->v_mount != tdvp->v_mount) { 1379 error = EXDEV; 1380 goto abort; 1381 } 1382 1383 /* 1384 * Reject "." and ".." 1385 */ 1386 if ((fcnp->cn_flags & ISDOTDOT) || (tcnp->cn_flags & ISDOTDOT) || 1387 (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 1388 (tcnp->cn_namelen == 1 && tcnp->cn_nameptr[0] == '.')) { 1389 error = EINVAL; 1390 goto abort; 1391 } 1392 1393 /* 1394 * Get locks. 1395 */ 1396 1397 /* paranoia */ 1398 fcnp->cn_flags |= LOCKPARENT|LOCKLEAF; 1399 tcnp->cn_flags |= LOCKPARENT|LOCKLEAF; 1400 1401 if (fdvp == tdvp) { 1402 /* One directory. Lock it and relookup both children. */ 1403 vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY); 1404 1405 if (VTOI(fdvp)->i_size == 0) { 1406 /* directory has been rmdir'd */ 1407 VOP_UNLOCK(fdvp); 1408 error = ENOENT; 1409 goto abort; 1410 } 1411 1412 error = do_relookup(fdvp, &from_ulr, &fvp, fcnp); 1413 if (error == 0 && fvp == NULL) { 1414 /* relookup may produce this if fvp disappears */ 1415 error = ENOENT; 1416 } 1417 if (error) { 1418 VOP_UNLOCK(fdvp); 1419 goto abort; 1420 } 1421 1422 /* 1423 * The right way to do this is to look up both children 1424 * without locking either, and then lock both unless they 1425 * turn out to be the same. However, due to deep-seated 1426 * VFS-level issues all lookups lock the child regardless 1427 * of whether LOCKLEAF is set (if LOCKLEAF is not set, 1428 * the child is locked during lookup and then unlocked) 1429 * so it is not safe to look up tvp while fvp is locked. 1430 * 1431 * Unlocking fvp here temporarily is more or less safe, 1432 * because with the directory locked there's not much 1433 * that can happen to it. However, ideally it wouldn't 1434 * be necessary. XXX. 1435 */ 1436 VOP_UNLOCK(fvp); 1437 /* remember fdvp == tdvp so tdvp is locked */ 1438 error = do_relookup(tdvp, &to_ulr, &tvp, tcnp); 1439 if (error && error != ENOENT) { 1440 VOP_UNLOCK(fdvp); 1441 goto abort; 1442 } 1443 if (error == ENOENT) { 1444 /* 1445 * Note: currently if the name doesn't exist, 1446 * relookup succeeds (it intercepts the 1447 * EJUSTRETURN from VOP_LOOKUP) and sets tvp 1448 * to NULL. Therefore, we will never get 1449 * ENOENT and this branch is not needed. 1450 * However, in a saner future the EJUSTRETURN 1451 * garbage will go away, so let's DTRT. 1452 */ 1453 tvp = NULL; 1454 } 1455 1456 /* tvp is locked; lock fvp if necessary */ 1457 if (!tvp || tvp != fvp) { 1458 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); 1459 } 1460 } else { 1461 int found_fdvp; 1462 struct vnode *illegal_fvp; 1463 1464 /* 1465 * The source must not be above the destination. (If 1466 * it were, the rename would detach a section of the 1467 * tree.) 1468 * 1469 * Look up the tree from tdvp to see if we find fdvp, 1470 * and if so, return the immediate child of fdvp we're 1471 * under; that must not turn out to be the same as 1472 * fvp. 1473 * 1474 * The per-volume rename lock guarantees that the 1475 * result of this check remains true until we finish 1476 * looking up and locking. 1477 */ 1478 error = ufs_parentcheck(fdvp, tdvp, fcnp->cn_cred, 1479 &found_fdvp, &illegal_fvp); 1480 if (error) { 1481 goto abort; 1482 } 1483 1484 /* Must lock in tree order. */ 1485 1486 if (found_fdvp) { 1487 /* fdvp -> fvp -> tdvp -> tvp */ 1488 error = lock_vnode_sequence(fdvp, &from_ulr, 1489 &fvp, fcnp, 0, 1490 EINVAL, 1491 tdvp, &to_ulr, 1492 &tvp, tcnp, 1); 1493 } else { 1494 /* tdvp -> tvp -> fdvp -> fvp */ 1495 error = lock_vnode_sequence(tdvp, &to_ulr, 1496 &tvp, tcnp, 1, 1497 ENOTEMPTY, 1498 fdvp, &from_ulr, 1499 &fvp, fcnp, 0); 1500 } 1501 if (error) { 1502 if (illegal_fvp) { 1503 vrele(illegal_fvp); 1504 } 1505 goto abort; 1506 } 1507 KASSERT(fvp != NULL); 1508 1509 if (illegal_fvp && fvp == illegal_fvp) { 1510 vrele(illegal_fvp); 1511 error = EINVAL; 1512 goto abort_withlocks; 1513 } 1514 1515 if (illegal_fvp) { 1516 vrele(illegal_fvp); 1517 } 1518 } 1519 1520 KASSERT(fdvp && VOP_ISLOCKED(fdvp)); 1521 KASSERT(fvp && VOP_ISLOCKED(fvp)); 1522 KASSERT(tdvp && VOP_ISLOCKED(tdvp)); 1523 KASSERT(tvp == NULL || VOP_ISLOCKED(tvp)); 1524 1525 /* --- everything is now locked --- */ 1526 1527 if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) || 1528 (VTOI(tdvp)->i_flags & APPEND))) { 1529 error = EPERM; 1530 goto abort_withlocks; 1531 } 1532 1533 /* 1534 * Check if just deleting a link name. 1535 */ 1536 if (fvp == tvp) { 1537 if (fvp->v_type == VDIR) { 1538 error = EINVAL; 1539 goto abort_withlocks; 1540 } 1541 1542 /* Release destination completely. Leave fdvp locked. */ 1543 VOP_ABORTOP(tdvp, tcnp); 1544 if (fdvp != tdvp) { 1545 VOP_UNLOCK(tdvp); 1546 } 1547 VOP_UNLOCK(tvp); 1548 vrele(tdvp); 1549 vrele(tvp); 1550 1551 /* Delete source. */ 1552 /* XXX: do we really need to relookup again? */ 1553 1554 /* 1555 * fdvp is still locked, but we just unlocked fvp 1556 * (because fvp == tvp) so just decref fvp 1557 */ 1558 vrele(fvp); 1559 fcnp->cn_flags &= ~(MODMASK); 1560 fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; 1561 fcnp->cn_nameiop = DELETE; 1562 if ((error = relookup(fdvp, &fvp, fcnp, 0))) { 1563 vput(fdvp); 1564 return (error); 1565 } 1566 return (VOP_REMOVE(fdvp, fvp, fcnp)); 1567 } 1568 fdp = VTOI(fdvp); 1569 ip = VTOI(fvp); 1570 if ((nlink_t) ip->i_nlink >= LINK_MAX) { 1571 error = EMLINK; 1572 goto abort_withlocks; 1573 } 1574 if ((ip->i_flags & (IMMUTABLE | APPEND)) || 1575 (fdp->i_flags & APPEND)) { 1576 error = EPERM; 1577 goto abort_withlocks; 1578 } 1579 if ((ip->i_mode & IFMT) == IFDIR) { 1580 /* 1581 * Avoid ".", "..", and aliases of "." for obvious reasons. 1582 */ 1583 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 1584 fdp == ip || 1585 (fcnp->cn_flags & ISDOTDOT) || 1586 (tcnp->cn_flags & ISDOTDOT) || 1587 (ip->i_flag & IN_RENAME)) { 1588 error = EINVAL; 1589 goto abort_withlocks; 1590 } 1591 ip->i_flag |= IN_RENAME; 1592 doingdirectory = 1; 1593 } 1594 oldparent = fdp->i_number; 1595 VN_KNOTE(fdvp, NOTE_WRITE); /* XXXLUKEM/XXX: right place? */ 1596 1597 /* 1598 * Both the directory 1599 * and target vnodes are locked. 1600 */ 1601 tdp = VTOI(tdvp); 1602 txp = NULL; 1603 if (tvp) 1604 txp = VTOI(tvp); 1605 1606 mp = fdvp->v_mount; 1607 fstrans_start(mp, FSTRANS_SHARED); 1608 1609 if (oldparent != tdp->i_number) 1610 newparent = tdp->i_number; 1611 1612 /* 1613 * If ".." must be changed (ie the directory gets a new 1614 * parent) the user must have write permission in the source 1615 * so as to be able to change "..". 1616 */ 1617 if (doingdirectory && newparent) { 1618 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred); 1619 if (error) 1620 goto out; 1621 } 1622 1623 KASSERT(fdvp != tvp); 1624 1625 if (newparent) { 1626 /* Check for the rename("foo/foo", "foo") case. */ 1627 if (fdvp == tvp) { 1628 error = doingdirectory ? ENOTEMPTY : EISDIR; 1629 goto out; 1630 } 1631 } 1632 1633 fxp = VTOI(fvp); 1634 fdp = VTOI(fdvp); 1635 1636 error = UFS_WAPBL_BEGIN(fdvp->v_mount); 1637 if (error) 1638 goto out2; 1639 1640 /* 1641 * 1) Bump link count while we're moving stuff 1642 * around. If we crash somewhere before 1643 * completing our work, the link count 1644 * may be wrong, but correctable. 1645 */ 1646 ip->i_nlink++; 1647 DIP_ASSIGN(ip, nlink, ip->i_nlink); 1648 ip->i_flag |= IN_CHANGE; 1649 if ((error = UFS_UPDATE(fvp, NULL, NULL, UPDATE_DIROP)) != 0) { 1650 goto bad; 1651 } 1652 1653 /* 1654 * 2) If target doesn't exist, link the target 1655 * to the source and unlink the source. 1656 * Otherwise, rewrite the target directory 1657 * entry to reference the source inode and 1658 * expunge the original entry's existence. 1659 */ 1660 if (txp == NULL) { 1661 if (tdp->i_dev != ip->i_dev) 1662 panic("rename: EXDEV"); 1663 /* 1664 * Account for ".." in new directory. 1665 * When source and destination have the same 1666 * parent we don't fool with the link count. 1667 */ 1668 if (doingdirectory && newparent) { 1669 if ((nlink_t)tdp->i_nlink >= LINK_MAX) { 1670 error = EMLINK; 1671 goto bad; 1672 } 1673 tdp->i_nlink++; 1674 DIP_ASSIGN(tdp, nlink, tdp->i_nlink); 1675 tdp->i_flag |= IN_CHANGE; 1676 if ((error = UFS_UPDATE(tdvp, NULL, NULL, 1677 UPDATE_DIROP)) != 0) { 1678 tdp->i_nlink--; 1679 DIP_ASSIGN(tdp, nlink, tdp->i_nlink); 1680 tdp->i_flag |= IN_CHANGE; 1681 goto bad; 1682 } 1683 } 1684 newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK); 1685 ufs_makedirentry(ip, tcnp, newdir); 1686 error = ufs_direnter(tdvp, &to_ulr, 1687 NULL, newdir, tcnp, NULL); 1688 pool_cache_put(ufs_direct_cache, newdir); 1689 if (error != 0) { 1690 if (doingdirectory && newparent) { 1691 tdp->i_nlink--; 1692 DIP_ASSIGN(tdp, nlink, tdp->i_nlink); 1693 tdp->i_flag |= IN_CHANGE; 1694 (void)UFS_UPDATE(tdvp, NULL, NULL, 1695 UPDATE_WAIT | UPDATE_DIROP); 1696 } 1697 goto bad; 1698 } 1699 VN_KNOTE(tdvp, NOTE_WRITE); 1700 } else { 1701 if (txp->i_dev != tdp->i_dev || txp->i_dev != ip->i_dev) 1702 panic("rename: EXDEV"); 1703 /* 1704 * Short circuit rename(foo, foo). 1705 */ 1706 if (txp->i_number == ip->i_number) 1707 panic("rename: same file"); 1708 /* 1709 * If the parent directory is "sticky", then the user must 1710 * own the parent directory, or the destination of the rename, 1711 * otherwise the destination may not be changed (except by 1712 * root). This implements append-only directories. 1713 */ 1714 if ((tdp->i_mode & S_ISTXT) && 1715 kauth_authorize_generic(tcnp->cn_cred, 1716 KAUTH_GENERIC_ISSUSER, NULL) != 0 && 1717 kauth_cred_geteuid(tcnp->cn_cred) != tdp->i_uid && 1718 txp->i_uid != kauth_cred_geteuid(tcnp->cn_cred)) { 1719 error = EPERM; 1720 goto bad; 1721 } 1722 /* 1723 * Target must be empty if a directory and have no links 1724 * to it. Also, ensure source and target are compatible 1725 * (both directories, or both not directories). 1726 */ 1727 if ((txp->i_mode & IFMT) == IFDIR) { 1728 if (txp->i_nlink > 2 || 1729 !ufs_dirempty(txp, tdp->i_number, tcnp->cn_cred)) { 1730 error = ENOTEMPTY; 1731 goto bad; 1732 } 1733 if (!doingdirectory) { 1734 error = ENOTDIR; 1735 goto bad; 1736 } 1737 cache_purge(tdvp); 1738 } else if (doingdirectory) { 1739 error = EISDIR; 1740 goto bad; 1741 } 1742 if ((error = ufs_dirrewrite(tdp, to_ulr.ulr_offset, 1743 txp, ip->i_number, 1744 IFTODT(ip->i_mode), doingdirectory && newparent ? 1745 newparent : doingdirectory, IN_CHANGE | IN_UPDATE)) != 0) 1746 goto bad; 1747 if (doingdirectory) { 1748 /* 1749 * Truncate inode. The only stuff left in the directory 1750 * is "." and "..". The "." reference is inconsequential 1751 * since we are quashing it. We have removed the "." 1752 * reference and the reference in the parent directory, 1753 * but there may be other hard links. 1754 */ 1755 if (!newparent) { 1756 tdp->i_nlink--; 1757 DIP_ASSIGN(tdp, nlink, tdp->i_nlink); 1758 tdp->i_flag |= IN_CHANGE; 1759 UFS_WAPBL_UPDATE(tdvp, NULL, NULL, 0); 1760 } 1761 txp->i_nlink--; 1762 DIP_ASSIGN(txp, nlink, txp->i_nlink); 1763 txp->i_flag |= IN_CHANGE; 1764 if ((error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC, 1765 tcnp->cn_cred))) 1766 goto bad; 1767 } 1768 VN_KNOTE(tdvp, NOTE_WRITE); 1769 VN_KNOTE(tvp, NOTE_DELETE); 1770 } 1771 1772 /* 1773 * Handle case where the directory entry we need to remove, 1774 * which is/was at from_ulr.ulr_offset, or the one before it, 1775 * which is/was at from_ulr.ulr_offset - from_ulr.ulr_count, 1776 * may have been moved when the directory insertion above 1777 * performed compaction. 1778 */ 1779 if (tdp->i_number == fdp->i_number && 1780 ulr_overlap(&from_ulr, &to_ulr)) { 1781 1782 struct buf *bp; 1783 struct direct *ep; 1784 struct ufsmount *ump = fdp->i_ump; 1785 doff_t curpos; 1786 doff_t endsearch; /* offset to end directory search */ 1787 uint32_t prev_reclen; 1788 int dirblksiz = ump->um_dirblksiz; 1789 const int needswap = UFS_MPNEEDSWAP(ump); 1790 u_long bmask; 1791 int namlen, entryoffsetinblock; 1792 char *dirbuf; 1793 1794 bmask = fdvp->v_mount->mnt_stat.f_iosize - 1; 1795 1796 /* 1797 * The fcnp entry will be somewhere between the start of 1798 * compaction (to_ulr.ulr_offset) and the original location 1799 * (from_ulr.ulr_offset). 1800 */ 1801 curpos = to_ulr.ulr_offset; 1802 endsearch = from_ulr.ulr_offset + from_ulr.ulr_reclen; 1803 entryoffsetinblock = 0; 1804 1805 /* 1806 * Get the directory block containing the start of 1807 * compaction. 1808 */ 1809 error = ufs_blkatoff(fdvp, (off_t)to_ulr.ulr_offset, &dirbuf, 1810 &bp, false); 1811 if (error) 1812 goto bad; 1813 1814 /* 1815 * Keep existing ulr_count (length of previous record) 1816 * for the case where compaction did not include the 1817 * previous entry but started at the from-entry. 1818 */ 1819 prev_reclen = from_ulr.ulr_count; 1820 1821 while (curpos < endsearch) { 1822 uint32_t reclen; 1823 1824 /* 1825 * If necessary, get the next directory block. 1826 * 1827 * dholland 7/13/11 to the best of my understanding 1828 * this should never happen; compaction occurs only 1829 * within single blocks. I think. 1830 */ 1831 if ((curpos & bmask) == 0) { 1832 if (bp != NULL) 1833 brelse(bp, 0); 1834 error = ufs_blkatoff(fdvp, (off_t)curpos, 1835 &dirbuf, &bp, false); 1836 if (error) 1837 goto bad; 1838 entryoffsetinblock = 0; 1839 } 1840 1841 KASSERT(bp != NULL); 1842 ep = (struct direct *)(dirbuf + entryoffsetinblock); 1843 reclen = ufs_rw16(ep->d_reclen, needswap); 1844 1845#if (BYTE_ORDER == LITTLE_ENDIAN) 1846 if (FSFMT(fdvp) && needswap == 0) 1847 namlen = ep->d_type; 1848 else 1849 namlen = ep->d_namlen; 1850#else 1851 if (FSFMT(fdvp) && needswap != 0) 1852 namlen = ep->d_type; 1853 else 1854 namlen = ep->d_namlen; 1855#endif 1856 if ((ep->d_ino != 0) && 1857 (ufs_rw32(ep->d_ino, needswap) != WINO) && 1858 (namlen == fcnp->cn_namelen) && 1859 memcmp(ep->d_name, fcnp->cn_nameptr, namlen) == 0) { 1860 from_ulr.ulr_reclen = reclen; 1861 break; 1862 } 1863 curpos += reclen; 1864 entryoffsetinblock += reclen; 1865 prev_reclen = reclen; 1866 } 1867 1868 from_ulr.ulr_offset = curpos; 1869 from_ulr.ulr_count = prev_reclen; 1870 1871 KASSERT(curpos <= endsearch); 1872 1873 /* 1874 * If ulr_offset points to start of a directory block, 1875 * clear ulr_count so ufs_dirremove() doesn't try to 1876 * merge free space over a directory block boundary. 1877 */ 1878 if ((from_ulr.ulr_offset & (dirblksiz - 1)) == 0) 1879 from_ulr.ulr_count = 0; 1880 1881 brelse(bp, 0); 1882 } 1883 1884 /* 1885 * 3) Unlink the source. 1886 */ 1887 1888#if 0 1889 /* 1890 * Ensure that the directory entry still exists and has not 1891 * changed while the new name has been entered. If the source is 1892 * a file then the entry may have been unlinked or renamed. In 1893 * either case there is no further work to be done. If the source 1894 * is a directory then it cannot have been rmdir'ed; The IRENAME 1895 * flag ensures that it cannot be moved by another rename or removed 1896 * by a rmdir. 1897 */ 1898#endif 1899 KASSERT(fxp == ip); 1900 1901 /* 1902 * If the source is a directory with a new parent, the link 1903 * count of the old parent directory must be decremented and 1904 * ".." set to point to the new parent. 1905 */ 1906 if (doingdirectory && newparent) { 1907 KASSERT(fdp != NULL); 1908 ufs_dirrewrite(fxp, mastertemplate.dot_reclen, 1909 fdp, newparent, DT_DIR, 0, IN_CHANGE); 1910 cache_purge(fdvp); 1911 } 1912 error = ufs_dirremove(fdvp, &from_ulr, 1913 fxp, fcnp->cn_flags, 0); 1914 fxp->i_flag &= ~IN_RENAME; 1915 1916 VN_KNOTE(fvp, NOTE_RENAME); 1917 goto done; 1918 1919 out: 1920 goto out2; 1921 1922 /* exit routines from steps 1 & 2 */ 1923 bad: 1924 if (doingdirectory) 1925 ip->i_flag &= ~IN_RENAME; 1926 ip->i_nlink--; 1927 DIP_ASSIGN(ip, nlink, ip->i_nlink); 1928 ip->i_flag |= IN_CHANGE; 1929 ip->i_flag &= ~IN_RENAME; 1930 UFS_WAPBL_UPDATE(fvp, NULL, NULL, 0); 1931 done: 1932 UFS_WAPBL_END(fdvp->v_mount); 1933 out2: 1934 /* 1935 * clear IN_RENAME - some exit paths happen too early to go 1936 * through the cleanup done in the "bad" case above, so we 1937 * always do this mini-cleanup here. 1938 */ 1939 ip->i_flag &= ~IN_RENAME; 1940 1941 VOP_UNLOCK(fdvp); 1942 if (tdvp != fdvp) { 1943 VOP_UNLOCK(tdvp); 1944 } 1945 VOP_UNLOCK(fvp); 1946 if (tvp && tvp != fvp) { 1947 VOP_UNLOCK(tvp); 1948 } 1949 1950 vrele(fdvp); 1951 vrele(tdvp); 1952 vrele(fvp); 1953 if (tvp) { 1954 vrele(tvp); 1955 } 1956 1957 fstrans_done(mp); 1958 return (error); 1959 1960 abort_withlocks: 1961 VOP_UNLOCK(fdvp); 1962 if (tdvp != fdvp) { 1963 VOP_UNLOCK(tdvp); 1964 } 1965 VOP_UNLOCK(fvp); 1966 if (tvp && tvp != fvp) { 1967 VOP_UNLOCK(tvp); 1968 } 1969 1970 abort: 1971 VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */ 1972 VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */ 1973 vrele(tdvp); 1974 if (tvp) { 1975 vrele(tvp); 1976 } 1977 vrele(fdvp); 1978 if (fvp) { 1979 vrele(fvp); 1980 } 1981 return (error); 1982} 1983 1984int 1985ufs_mkdir(void *v) 1986{ 1987 struct vop_mkdir_args /* { 1988 struct vnode *a_dvp; 1989 struct vnode **a_vpp; 1990 struct componentname *a_cnp; 1991 struct vattr *a_vap; 1992 } */ *ap = v; 1993 struct vnode *dvp = ap->a_dvp, *tvp; 1994 struct vattr *vap = ap->a_vap; 1995 struct componentname *cnp = ap->a_cnp; 1996 struct inode *ip, *dp = VTOI(dvp); 1997 struct buf *bp; 1998 struct dirtemplate dirtemplate; 1999 struct direct *newdir; 2000 int error, dmode; 2001 struct ufsmount *ump = dp->i_ump; 2002 int dirblksiz = ump->um_dirblksiz; 2003 struct ufs_lookup_results *ulr; 2004 2005 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 2006 2007 /* XXX should handle this material another way */ 2008 ulr = &dp->i_crap; 2009 UFS_CHECK_CRAPCOUNTER(dp); 2010 2011 if ((nlink_t)dp->i_nlink >= LINK_MAX) { 2012 error = EMLINK; 2013 goto out; 2014 } 2015 dmode = vap->va_mode & ACCESSPERMS; 2016 dmode |= IFDIR; 2017 /* 2018 * Must simulate part of ufs_makeinode here to acquire the inode, 2019 * but not have it entered in the parent directory. The entry is 2020 * made later after writing "." and ".." entries. 2021 */ 2022 if ((error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, ap->a_vpp)) != 0) 2023 goto out; 2024 2025 tvp = *ap->a_vpp; 2026 ip = VTOI(tvp); 2027 2028 error = UFS_WAPBL_BEGIN(ap->a_dvp->v_mount); 2029 if (error) { 2030 UFS_VFREE(tvp, ip->i_number, dmode); 2031 vput(tvp); 2032 goto out; 2033 } 2034 ip->i_uid = kauth_cred_geteuid(cnp->cn_cred); 2035 DIP_ASSIGN(ip, uid, ip->i_uid); 2036 ip->i_gid = dp->i_gid; 2037 DIP_ASSIGN(ip, gid, ip->i_gid); 2038#if defined(QUOTA) || defined(QUOTA2) 2039 if ((error = chkiq(ip, 1, cnp->cn_cred, 0))) { 2040 UFS_VFREE(tvp, ip->i_number, dmode); 2041 UFS_WAPBL_END(dvp->v_mount); 2042 fstrans_done(dvp->v_mount); 2043 vput(tvp); 2044 vput(dvp); 2045 return (error); 2046 } 2047#endif 2048 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 2049 ip->i_mode = dmode; 2050 DIP_ASSIGN(ip, mode, dmode); 2051 tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ 2052 ip->i_nlink = 2; 2053 DIP_ASSIGN(ip, nlink, 2); 2054 if (cnp->cn_flags & ISWHITEOUT) { 2055 ip->i_flags |= UF_OPAQUE; 2056 DIP_ASSIGN(ip, flags, ip->i_flags); 2057 } 2058 2059 /* 2060 * Bump link count in parent directory to reflect work done below. 2061 * Should be done before reference is created so cleanup is 2062 * possible if we crash. 2063 */ 2064 dp->i_nlink++; 2065 DIP_ASSIGN(dp, nlink, dp->i_nlink); 2066 dp->i_flag |= IN_CHANGE; 2067 if ((error = UFS_UPDATE(dvp, NULL, NULL, UPDATE_DIROP)) != 0) 2068 goto bad; 2069 2070 /* 2071 * Initialize directory with "." and ".." from static template. 2072 */ 2073 dirtemplate = mastertemplate; 2074 dirtemplate.dotdot_reclen = dirblksiz - dirtemplate.dot_reclen; 2075 dirtemplate.dot_ino = ufs_rw32(ip->i_number, UFS_MPNEEDSWAP(ump)); 2076 dirtemplate.dotdot_ino = ufs_rw32(dp->i_number, UFS_MPNEEDSWAP(ump)); 2077 dirtemplate.dot_reclen = ufs_rw16(dirtemplate.dot_reclen, 2078 UFS_MPNEEDSWAP(ump)); 2079 dirtemplate.dotdot_reclen = ufs_rw16(dirtemplate.dotdot_reclen, 2080 UFS_MPNEEDSWAP(ump)); 2081 if (ump->um_maxsymlinklen <= 0) { 2082#if BYTE_ORDER == LITTLE_ENDIAN 2083 if (UFS_MPNEEDSWAP(ump) == 0) 2084#else 2085 if (UFS_MPNEEDSWAP(ump) != 0) 2086#endif 2087 { 2088 dirtemplate.dot_type = dirtemplate.dot_namlen; 2089 dirtemplate.dotdot_type = dirtemplate.dotdot_namlen; 2090 dirtemplate.dot_namlen = dirtemplate.dotdot_namlen = 0; 2091 } else 2092 dirtemplate.dot_type = dirtemplate.dotdot_type = 0; 2093 } 2094 if ((error = UFS_BALLOC(tvp, (off_t)0, dirblksiz, cnp->cn_cred, 2095 B_CLRBUF, &bp)) != 0) 2096 goto bad; 2097 ip->i_size = dirblksiz; 2098 DIP_ASSIGN(ip, size, dirblksiz); 2099 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 2100 uvm_vnp_setsize(tvp, ip->i_size); 2101 memcpy((void *)bp->b_data, (void *)&dirtemplate, sizeof dirtemplate); 2102 2103 /* 2104 * Directory set up, now install it's entry in the parent directory. 2105 * We must write out the buffer containing the new directory body 2106 * before entering the new name in the parent. 2107 */ 2108 if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0) 2109 goto bad; 2110 if ((error = UFS_UPDATE(tvp, NULL, NULL, UPDATE_DIROP)) != 0) { 2111 goto bad; 2112 } 2113 newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK); 2114 ufs_makedirentry(ip, cnp, newdir); 2115 error = ufs_direnter(dvp, ulr, tvp, newdir, cnp, bp); 2116 pool_cache_put(ufs_direct_cache, newdir); 2117 bad: 2118 if (error == 0) { 2119 VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); 2120 UFS_WAPBL_END(dvp->v_mount); 2121 } else { 2122 dp->i_nlink--; 2123 DIP_ASSIGN(dp, nlink, dp->i_nlink); 2124 dp->i_flag |= IN_CHANGE; 2125 UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP); 2126 /* 2127 * No need to do an explicit UFS_TRUNCATE here, vrele will 2128 * do this for us because we set the link count to 0. 2129 */ 2130 ip->i_nlink = 0; 2131 DIP_ASSIGN(ip, nlink, 0); 2132 ip->i_flag |= IN_CHANGE; 2133 /* If IN_ADIROP, account for it */ 2134 UFS_UNMARK_VNODE(tvp); 2135 UFS_WAPBL_UPDATE(tvp, NULL, NULL, UPDATE_DIROP); 2136 UFS_WAPBL_END(dvp->v_mount); 2137 vput(tvp); 2138 } 2139 out: 2140 fstrans_done(dvp->v_mount); 2141 vput(dvp); 2142 return (error); 2143} 2144 2145int 2146ufs_rmdir(void *v) 2147{ 2148 struct vop_rmdir_args /* { 2149 struct vnode *a_dvp; 2150 struct vnode *a_vp; 2151 struct componentname *a_cnp; 2152 } */ *ap = v; 2153 struct vnode *vp, *dvp; 2154 struct componentname *cnp; 2155 struct inode *ip, *dp; 2156 int error; 2157 struct ufs_lookup_results *ulr; 2158 2159 vp = ap->a_vp; 2160 dvp = ap->a_dvp; 2161 cnp = ap->a_cnp; 2162 ip = VTOI(vp); 2163 dp = VTOI(dvp); 2164 2165 /* XXX should handle this material another way */ 2166 ulr = &dp->i_crap; 2167 UFS_CHECK_CRAPCOUNTER(dp); 2168 2169 /* 2170 * No rmdir "." or of mounted directories please. 2171 */ 2172 if (dp == ip || vp->v_mountedhere != NULL) { 2173 if (dp == ip) 2174 vrele(dvp); 2175 else 2176 vput(dvp); 2177 vput(vp); 2178 return (EINVAL); 2179 } 2180 2181 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 2182 2183 /* 2184 * Do not remove a directory that is in the process of being renamed. 2185 * Verify that the directory is empty (and valid). (Rmdir ".." won't 2186 * be valid since ".." will contain a reference to the current 2187 * directory and thus be non-empty.) 2188 */ 2189 error = 0; 2190 if (ip->i_flag & IN_RENAME) { 2191 error = EINVAL; 2192 goto out; 2193 } 2194 if (ip->i_nlink != 2 || 2195 !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { 2196 error = ENOTEMPTY; 2197 goto out; 2198 } 2199 if ((dp->i_flags & APPEND) || 2200 (ip->i_flags & (IMMUTABLE | APPEND))) { 2201 error = EPERM; 2202 goto out; 2203 } 2204 error = UFS_WAPBL_BEGIN(dvp->v_mount); 2205 if (error) 2206 goto out; 2207 /* 2208 * Delete reference to directory before purging 2209 * inode. If we crash in between, the directory 2210 * will be reattached to lost+found, 2211 */ 2212 error = ufs_dirremove(dvp, ulr, ip, cnp->cn_flags, 1); 2213 if (error) { 2214 UFS_WAPBL_END(dvp->v_mount); 2215 goto out; 2216 } 2217 VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); 2218 cache_purge(dvp); 2219 /* 2220 * Truncate inode. The only stuff left in the directory is "." and 2221 * "..". The "." reference is inconsequential since we're quashing 2222 * it. 2223 */ 2224 dp->i_nlink--; 2225 DIP_ASSIGN(dp, nlink, dp->i_nlink); 2226 dp->i_flag |= IN_CHANGE; 2227 UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP); 2228 ip->i_nlink--; 2229 DIP_ASSIGN(ip, nlink, ip->i_nlink); 2230 ip->i_flag |= IN_CHANGE; 2231 error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred); 2232 cache_purge(vp); 2233 /* 2234 * Unlock the log while we still have reference to unlinked 2235 * directory vp so that it will not get locked for recycling 2236 */ 2237 UFS_WAPBL_END(dvp->v_mount); 2238#ifdef UFS_DIRHASH 2239 if (ip->i_dirhash != NULL) 2240 ufsdirhash_free(ip); 2241#endif 2242 out: 2243 VN_KNOTE(vp, NOTE_DELETE); 2244 vput(vp); 2245 fstrans_done(dvp->v_mount); 2246 vput(dvp); 2247 return (error); 2248} 2249 2250/* 2251 * symlink -- make a symbolic link 2252 */ 2253int 2254ufs_symlink(void *v) 2255{ 2256 struct vop_symlink_args /* { 2257 struct vnode *a_dvp; 2258 struct vnode **a_vpp; 2259 struct componentname *a_cnp; 2260 struct vattr *a_vap; 2261 char *a_target; 2262 } */ *ap = v; 2263 struct vnode *vp, **vpp; 2264 struct inode *ip; 2265 int len, error; 2266 struct ufs_lookup_results *ulr; 2267 2268 vpp = ap->a_vpp; 2269 2270 /* XXX should handle this material another way */ 2271 ulr = &VTOI(ap->a_dvp)->i_crap; 2272 UFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp)); 2273 2274 /* 2275 * UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful 2276 * ufs_makeinode 2277 */ 2278 fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED); 2279 error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, ulr, 2280 vpp, ap->a_cnp); 2281 if (error) 2282 goto out; 2283 VN_KNOTE(ap->a_dvp, NOTE_WRITE); 2284 vp = *vpp; 2285 len = strlen(ap->a_target); 2286 ip = VTOI(vp); 2287 if (len < ip->i_ump->um_maxsymlinklen) { 2288 memcpy((char *)SHORTLINK(ip), ap->a_target, len); 2289 ip->i_size = len; 2290 DIP_ASSIGN(ip, size, len); 2291 uvm_vnp_setsize(vp, ip->i_size); 2292 ip->i_flag |= IN_CHANGE | IN_UPDATE; 2293 if (vp->v_mount->mnt_flag & MNT_RELATIME) 2294 ip->i_flag |= IN_ACCESS; 2295 UFS_WAPBL_UPDATE(vp, NULL, NULL, 0); 2296 } else 2297 error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, 2298 UIO_SYSSPACE, IO_NODELOCKED | IO_JOURNALLOCKED, 2299 ap->a_cnp->cn_cred, NULL, NULL); 2300 UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp); 2301 if (error) 2302 vput(vp); 2303out: 2304 fstrans_done(ap->a_dvp->v_mount); 2305 return (error); 2306} 2307 2308/* 2309 * Vnode op for reading directories. 2310 * 2311 * This routine handles converting from the on-disk directory format 2312 * "struct direct" to the in-memory format "struct dirent" as well as 2313 * byte swapping the entries if necessary. 2314 */ 2315int 2316ufs_readdir(void *v) 2317{ 2318 struct vop_readdir_args /* { 2319 struct vnode *a_vp; 2320 struct uio *a_uio; 2321 kauth_cred_t a_cred; 2322 int *a_eofflag; 2323 off_t **a_cookies; 2324 int *ncookies; 2325 } */ *ap = v; 2326 struct vnode *vp = ap->a_vp; 2327 struct direct *cdp, *ecdp; 2328 struct dirent *ndp; 2329 char *cdbuf, *ndbuf, *endp; 2330 struct uio auio, *uio; 2331 struct iovec aiov; 2332 int error; 2333 size_t count, ccount, rcount, cdbufsz, ndbufsz; 2334 off_t off, *ccp; 2335 off_t startoff; 2336 size_t skipbytes; 2337 struct ufsmount *ump = VFSTOUFS(vp->v_mount); 2338 int nswap = UFS_MPNEEDSWAP(ump); 2339#if BYTE_ORDER == LITTLE_ENDIAN 2340 int needswap = ump->um_maxsymlinklen <= 0 && nswap == 0; 2341#else 2342 int needswap = ump->um_maxsymlinklen <= 0 && nswap != 0; 2343#endif 2344 uio = ap->a_uio; 2345 count = uio->uio_resid; 2346 rcount = count - ((uio->uio_offset + count) & (ump->um_dirblksiz - 1)); 2347 2348 if (rcount < _DIRENT_MINSIZE(cdp) || count < _DIRENT_MINSIZE(ndp)) 2349 return EINVAL; 2350 2351 startoff = uio->uio_offset & ~(ump->um_dirblksiz - 1); 2352 skipbytes = uio->uio_offset - startoff; 2353 rcount += skipbytes; 2354 2355 auio.uio_iov = &aiov; 2356 auio.uio_iovcnt = 1; 2357 auio.uio_offset = startoff; 2358 auio.uio_resid = rcount; 2359 UIO_SETUP_SYSSPACE(&auio); 2360 auio.uio_rw = UIO_READ; 2361 cdbufsz = rcount; 2362 cdbuf = kmem_alloc(cdbufsz, KM_SLEEP); 2363 aiov.iov_base = cdbuf; 2364 aiov.iov_len = rcount; 2365 error = VOP_READ(vp, &auio, 0, ap->a_cred); 2366 if (error != 0) { 2367 kmem_free(cdbuf, cdbufsz); 2368 return error; 2369 } 2370 2371 rcount -= auio.uio_resid; 2372 2373 cdp = (struct direct *)(void *)cdbuf; 2374 ecdp = (struct direct *)(void *)&cdbuf[rcount]; 2375 2376 ndbufsz = count; 2377 ndbuf = kmem_alloc(ndbufsz, KM_SLEEP); 2378 ndp = (struct dirent *)(void *)ndbuf; 2379 endp = &ndbuf[count]; 2380 2381 off = uio->uio_offset; 2382 if (ap->a_cookies) { 2383 ccount = rcount / _DIRENT_RECLEN(cdp, 1); 2384 ccp = *(ap->a_cookies) = malloc(ccount * sizeof(*ccp), 2385 M_TEMP, M_WAITOK); 2386 } else { 2387 /* XXX: GCC */ 2388 ccount = 0; 2389 ccp = NULL; 2390 } 2391 2392 while (cdp < ecdp) { 2393 cdp->d_reclen = ufs_rw16(cdp->d_reclen, nswap); 2394 if (skipbytes > 0) { 2395 if (cdp->d_reclen <= skipbytes) { 2396 skipbytes -= cdp->d_reclen; 2397 cdp = _DIRENT_NEXT(cdp); 2398 continue; 2399 } 2400 /* 2401 * invalid cookie. 2402 */ 2403 error = EINVAL; 2404 goto out; 2405 } 2406 if (cdp->d_reclen == 0) { 2407 struct dirent *ondp = ndp; 2408 ndp->d_reclen = _DIRENT_MINSIZE(ndp); 2409 ndp = _DIRENT_NEXT(ndp); 2410 ondp->d_reclen = 0; 2411 cdp = ecdp; 2412 break; 2413 } 2414 if (needswap) { 2415 ndp->d_type = cdp->d_namlen; 2416 ndp->d_namlen = cdp->d_type; 2417 } else { 2418 ndp->d_type = cdp->d_type; 2419 ndp->d_namlen = cdp->d_namlen; 2420 } 2421 ndp->d_reclen = _DIRENT_RECLEN(ndp, ndp->d_namlen); 2422 if ((char *)(void *)ndp + ndp->d_reclen + 2423 _DIRENT_MINSIZE(ndp) > endp) 2424 break; 2425 ndp->d_fileno = ufs_rw32(cdp->d_ino, nswap); 2426 (void)memcpy(ndp->d_name, cdp->d_name, ndp->d_namlen); 2427 memset(&ndp->d_name[ndp->d_namlen], 0, 2428 ndp->d_reclen - _DIRENT_NAMEOFF(ndp) - ndp->d_namlen); 2429 off += cdp->d_reclen; 2430 if (ap->a_cookies) { 2431 KASSERT(ccp - *(ap->a_cookies) < ccount); 2432 *(ccp++) = off; 2433 } 2434 ndp = _DIRENT_NEXT(ndp); 2435 cdp = _DIRENT_NEXT(cdp); 2436 } 2437 2438 count = ((char *)(void *)ndp - ndbuf); 2439 error = uiomove(ndbuf, count, uio); 2440out: 2441 if (ap->a_cookies) { 2442 if (error) { 2443 free(*(ap->a_cookies), M_TEMP); 2444 *(ap->a_cookies) = NULL; 2445 *(ap->a_ncookies) = 0; 2446 } else { 2447 *ap->a_ncookies = ccp - *(ap->a_cookies); 2448 } 2449 } 2450 uio->uio_offset = off; 2451 kmem_free(ndbuf, ndbufsz); 2452 kmem_free(cdbuf, cdbufsz); 2453 *ap->a_eofflag = VTOI(vp)->i_size <= uio->uio_offset; 2454 return error; 2455} 2456 2457/* 2458 * Return target name of a symbolic link 2459 */ 2460int 2461ufs_readlink(void *v) 2462{ 2463 struct vop_readlink_args /* { 2464 struct vnode *a_vp; 2465 struct uio *a_uio; 2466 kauth_cred_t a_cred; 2467 } */ *ap = v; 2468 struct vnode *vp = ap->a_vp; 2469 struct inode *ip = VTOI(vp); 2470 struct ufsmount *ump = VFSTOUFS(vp->v_mount); 2471 int isize; 2472 2473 isize = ip->i_size; 2474 if (isize < ump->um_maxsymlinklen || 2475 (ump->um_maxsymlinklen == 0 && DIP(ip, blocks) == 0)) { 2476 uiomove((char *)SHORTLINK(ip), isize, ap->a_uio); 2477 return (0); 2478 } 2479 return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); 2480} 2481 2482/* 2483 * Calculate the logical to physical mapping if not done already, 2484 * then call the device strategy routine. 2485 */ 2486int 2487ufs_strategy(void *v) 2488{ 2489 struct vop_strategy_args /* { 2490 struct vnode *a_vp; 2491 struct buf *a_bp; 2492 } */ *ap = v; 2493 struct buf *bp; 2494 struct vnode *vp; 2495 struct inode *ip; 2496 struct mount *mp; 2497 int error; 2498 2499 bp = ap->a_bp; 2500 vp = ap->a_vp; 2501 ip = VTOI(vp); 2502 if (vp->v_type == VBLK || vp->v_type == VCHR) 2503 panic("ufs_strategy: spec"); 2504 KASSERT(bp->b_bcount != 0); 2505 if (bp->b_blkno == bp->b_lblkno) { 2506 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, 2507 NULL); 2508 if (error) { 2509 bp->b_error = error; 2510 biodone(bp); 2511 return (error); 2512 } 2513 if (bp->b_blkno == -1) /* no valid data */ 2514 clrbuf(bp); 2515 } 2516 if (bp->b_blkno < 0) { /* block is not on disk */ 2517 biodone(bp); 2518 return (0); 2519 } 2520 vp = ip->i_devvp; 2521 2522 error = VOP_STRATEGY(vp, bp); 2523 if (error) 2524 return error; 2525 2526 if (!BUF_ISREAD(bp)) 2527 return 0; 2528 2529 mp = wapbl_vptomp(vp); 2530 if (mp == NULL || mp->mnt_wapbl_replay == NULL || 2531 !WAPBL_REPLAY_ISOPEN(mp) || 2532 !WAPBL_REPLAY_CAN_READ(mp, bp->b_blkno, bp->b_bcount)) 2533 return 0; 2534 2535 error = biowait(bp); 2536 if (error) 2537 return error; 2538 2539 error = WAPBL_REPLAY_READ(mp, bp->b_data, bp->b_blkno, bp->b_bcount); 2540 if (error) { 2541 mutex_enter(&bufcache_lock); 2542 SET(bp->b_cflags, BC_INVAL); 2543 mutex_exit(&bufcache_lock); 2544 } 2545 return error; 2546} 2547 2548/* 2549 * Print out the contents of an inode. 2550 */ 2551int 2552ufs_print(void *v) 2553{ 2554 struct vop_print_args /* { 2555 struct vnode *a_vp; 2556 } */ *ap = v; 2557 struct vnode *vp; 2558 struct inode *ip; 2559 2560 vp = ap->a_vp; 2561 ip = VTOI(vp); 2562 printf("tag VT_UFS, ino %llu, on dev %llu, %llu", 2563 (unsigned long long)ip->i_number, 2564 (unsigned long long)major(ip->i_dev), 2565 (unsigned long long)minor(ip->i_dev)); 2566 printf(" flags 0x%x, nlink %d\n", 2567 ip->i_flag, ip->i_nlink); 2568 printf("\tmode 0%o, owner %d, group %d, size %qd", 2569 ip->i_mode, ip->i_uid, ip->i_gid, 2570 (long long)ip->i_size); 2571 if (vp->v_type == VFIFO) 2572 VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v); 2573 printf("\n"); 2574 return (0); 2575} 2576 2577/* 2578 * Read wrapper for special devices. 2579 */ 2580int 2581ufsspec_read(void *v) 2582{ 2583 struct vop_read_args /* { 2584 struct vnode *a_vp; 2585 struct uio *a_uio; 2586 int a_ioflag; 2587 kauth_cred_t a_cred; 2588 } */ *ap = v; 2589 2590 /* 2591 * Set access flag. 2592 */ 2593 if ((ap->a_vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0) 2594 VTOI(ap->a_vp)->i_flag |= IN_ACCESS; 2595 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap)); 2596} 2597 2598/* 2599 * Write wrapper for special devices. 2600 */ 2601int 2602ufsspec_write(void *v) 2603{ 2604 struct vop_write_args /* { 2605 struct vnode *a_vp; 2606 struct uio *a_uio; 2607 int a_ioflag; 2608 kauth_cred_t a_cred; 2609 } */ *ap = v; 2610 2611 /* 2612 * Set update and change flags. 2613 */ 2614 if ((ap->a_vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0) 2615 VTOI(ap->a_vp)->i_flag |= IN_MODIFY; 2616 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap)); 2617} 2618 2619/* 2620 * Close wrapper for special devices. 2621 * 2622 * Update the times on the inode then do device close. 2623 */ 2624int 2625ufsspec_close(void *v) 2626{ 2627 struct vop_close_args /* { 2628 struct vnode *a_vp; 2629 int a_fflag; 2630 kauth_cred_t a_cred; 2631 } */ *ap = v; 2632 struct vnode *vp; 2633 struct inode *ip; 2634 2635 vp = ap->a_vp; 2636 ip = VTOI(vp); 2637 if (vp->v_usecount > 1) 2638 UFS_ITIMES(vp, NULL, NULL, NULL); 2639 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); 2640} 2641 2642/* 2643 * Read wrapper for fifo's 2644 */ 2645int 2646ufsfifo_read(void *v) 2647{ 2648 struct vop_read_args /* { 2649 struct vnode *a_vp; 2650 struct uio *a_uio; 2651 int a_ioflag; 2652 kauth_cred_t a_cred; 2653 } */ *ap = v; 2654 2655 /* 2656 * Set access flag. 2657 */ 2658 VTOI(ap->a_vp)->i_flag |= IN_ACCESS; 2659 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_read), ap)); 2660} 2661 2662/* 2663 * Write wrapper for fifo's. 2664 */ 2665int 2666ufsfifo_write(void *v) 2667{ 2668 struct vop_write_args /* { 2669 struct vnode *a_vp; 2670 struct uio *a_uio; 2671 int a_ioflag; 2672 kauth_cred_t a_cred; 2673 } */ *ap = v; 2674 2675 /* 2676 * Set update and change flags. 2677 */ 2678 VTOI(ap->a_vp)->i_flag |= IN_MODIFY; 2679 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_write), ap)); 2680} 2681 2682/* 2683 * Close wrapper for fifo's. 2684 * 2685 * Update the times on the inode then do device close. 2686 */ 2687int 2688ufsfifo_close(void *v) 2689{ 2690 struct vop_close_args /* { 2691 struct vnode *a_vp; 2692 int a_fflag; 2693 kauth_cred_t a_cred; 2694 } */ *ap = v; 2695 struct vnode *vp; 2696 struct inode *ip; 2697 2698 vp = ap->a_vp; 2699 ip = VTOI(vp); 2700 if (ap->a_vp->v_usecount > 1) 2701 UFS_ITIMES(vp, NULL, NULL, NULL); 2702 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); 2703} 2704 2705/* 2706 * Return POSIX pathconf information applicable to ufs filesystems. 2707 */ 2708int 2709ufs_pathconf(void *v) 2710{ 2711 struct vop_pathconf_args /* { 2712 struct vnode *a_vp; 2713 int a_name; 2714 register_t *a_retval; 2715 } */ *ap = v; 2716 2717 switch (ap->a_name) { 2718 case _PC_LINK_MAX: 2719 *ap->a_retval = LINK_MAX; 2720 return (0); 2721 case _PC_NAME_MAX: 2722 *ap->a_retval = FFS_MAXNAMLEN; 2723 return (0); 2724 case _PC_PATH_MAX: 2725 *ap->a_retval = PATH_MAX; 2726 return (0); 2727 case _PC_PIPE_BUF: 2728 *ap->a_retval = PIPE_BUF; 2729 return (0); 2730 case _PC_CHOWN_RESTRICTED: 2731 *ap->a_retval = 1; 2732 return (0); 2733 case _PC_NO_TRUNC: 2734 *ap->a_retval = 1; 2735 return (0); 2736 case _PC_SYNC_IO: 2737 *ap->a_retval = 1; 2738 return (0); 2739 case _PC_FILESIZEBITS: 2740 *ap->a_retval = 42; 2741 return (0); 2742 case _PC_SYMLINK_MAX: 2743 *ap->a_retval = MAXPATHLEN; 2744 return (0); 2745 case _PC_2_SYMLINKS: 2746 *ap->a_retval = 1; 2747 return (0); 2748 default: 2749 return (EINVAL); 2750 } 2751 /* NOTREACHED */ 2752} 2753 2754/* 2755 * Advisory record locking support 2756 */ 2757int 2758ufs_advlock(void *v) 2759{ 2760 struct vop_advlock_args /* { 2761 struct vnode *a_vp; 2762 void * a_id; 2763 int a_op; 2764 struct flock *a_fl; 2765 int a_flags; 2766 } */ *ap = v; 2767 struct inode *ip; 2768 2769 ip = VTOI(ap->a_vp); 2770 return lf_advlock(ap, &ip->i_lockf, ip->i_size); 2771} 2772 2773/* 2774 * Initialize the vnode associated with a new inode, handle aliased 2775 * vnodes. 2776 */ 2777void 2778ufs_vinit(struct mount *mntp, int (**specops)(void *), int (**fifoops)(void *), 2779 struct vnode **vpp) 2780{ 2781 struct timeval tv; 2782 struct inode *ip; 2783 struct vnode *vp; 2784 dev_t rdev; 2785 struct ufsmount *ump; 2786 2787 vp = *vpp; 2788 ip = VTOI(vp); 2789 switch(vp->v_type = IFTOVT(ip->i_mode)) { 2790 case VCHR: 2791 case VBLK: 2792 vp->v_op = specops; 2793 ump = ip->i_ump; 2794 if (ump->um_fstype == UFS1) 2795 rdev = (dev_t)ufs_rw32(ip->i_ffs1_rdev, 2796 UFS_MPNEEDSWAP(ump)); 2797 else 2798 rdev = (dev_t)ufs_rw64(ip->i_ffs2_rdev, 2799 UFS_MPNEEDSWAP(ump)); 2800 spec_node_init(vp, rdev); 2801 break; 2802 case VFIFO: 2803 vp->v_op = fifoops; 2804 break; 2805 case VNON: 2806 case VBAD: 2807 case VSOCK: 2808 case VLNK: 2809 case VDIR: 2810 case VREG: 2811 break; 2812 } 2813 if (ip->i_number == ROOTINO) 2814 vp->v_vflag |= VV_ROOT; 2815 /* 2816 * Initialize modrev times 2817 */ 2818 getmicrouptime(&tv); 2819 ip->i_modrev = (uint64_t)(uint)tv.tv_sec << 32 2820 | tv.tv_usec * 4294u; 2821 *vpp = vp; 2822} 2823 2824/* 2825 * Allocate a new inode. 2826 */ 2827int 2828ufs_makeinode(int mode, struct vnode *dvp, const struct ufs_lookup_results *ulr, 2829 struct vnode **vpp, struct componentname *cnp) 2830{ 2831 struct inode *ip, *pdir; 2832 struct direct *newdir; 2833 struct vnode *tvp; 2834 int error, ismember = 0; 2835 2836 UFS_WAPBL_JUNLOCK_ASSERT(dvp->v_mount); 2837 2838 pdir = VTOI(dvp); 2839 2840 if ((mode & IFMT) == 0) 2841 mode |= IFREG; 2842 2843 if ((error = UFS_VALLOC(dvp, mode, cnp->cn_cred, vpp)) != 0) { 2844 vput(dvp); 2845 return (error); 2846 } 2847 tvp = *vpp; 2848 ip = VTOI(tvp); 2849 ip->i_gid = pdir->i_gid; 2850 DIP_ASSIGN(ip, gid, ip->i_gid); 2851 ip->i_uid = kauth_cred_geteuid(cnp->cn_cred); 2852 DIP_ASSIGN(ip, uid, ip->i_uid); 2853 error = UFS_WAPBL_BEGIN1(dvp->v_mount, dvp); 2854 if (error) { 2855 /* 2856 * Note, we can't VOP_VFREE(tvp) here like we should 2857 * because we can't write to the disk. Instead, we leave 2858 * the vnode dangling from the journal. 2859 */ 2860 vput(tvp); 2861 vput(dvp); 2862 return (error); 2863 } 2864#if defined(QUOTA) || defined(QUOTA2) 2865 if ((error = chkiq(ip, 1, cnp->cn_cred, 0))) { 2866 UFS_VFREE(tvp, ip->i_number, mode); 2867 UFS_WAPBL_END1(dvp->v_mount, dvp); 2868 vput(tvp); 2869 vput(dvp); 2870 return (error); 2871 } 2872#endif 2873 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 2874 ip->i_mode = mode; 2875 DIP_ASSIGN(ip, mode, mode); 2876 tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ 2877 ip->i_nlink = 1; 2878 DIP_ASSIGN(ip, nlink, 1); 2879 if ((ip->i_mode & ISGID) && (kauth_cred_ismember_gid(cnp->cn_cred, 2880 ip->i_gid, &ismember) != 0 || !ismember) && 2881 kauth_authorize_generic(cnp->cn_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2882 ip->i_mode &= ~ISGID; 2883 DIP_ASSIGN(ip, mode, ip->i_mode); 2884 } 2885 2886 if (cnp->cn_flags & ISWHITEOUT) { 2887 ip->i_flags |= UF_OPAQUE; 2888 DIP_ASSIGN(ip, flags, ip->i_flags); 2889 } 2890 2891 /* 2892 * Make sure inode goes to disk before directory entry. 2893 */ 2894 if ((error = UFS_UPDATE(tvp, NULL, NULL, UPDATE_DIROP)) != 0) 2895 goto bad; 2896 newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK); 2897 ufs_makedirentry(ip, cnp, newdir); 2898 error = ufs_direnter(dvp, ulr, tvp, newdir, cnp, NULL); 2899 pool_cache_put(ufs_direct_cache, newdir); 2900 if (error) 2901 goto bad; 2902 vput(dvp); 2903 *vpp = tvp; 2904 return (0); 2905 2906 bad: 2907 /* 2908 * Write error occurred trying to update the inode 2909 * or the directory so must deallocate the inode. 2910 */ 2911 ip->i_nlink = 0; 2912 DIP_ASSIGN(ip, nlink, 0); 2913 ip->i_flag |= IN_CHANGE; 2914 /* If IN_ADIROP, account for it */ 2915 UFS_UNMARK_VNODE(tvp); 2916 UFS_WAPBL_UPDATE(tvp, NULL, NULL, 0); 2917 tvp->v_type = VNON; /* explodes later if VBLK */ 2918 UFS_WAPBL_END1(dvp->v_mount, dvp); 2919 vput(tvp); 2920 vput(dvp); 2921 return (error); 2922} 2923 2924/* 2925 * Allocate len bytes at offset off. 2926 */ 2927int 2928ufs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags, 2929 kauth_cred_t cred) 2930{ 2931 struct inode *ip = VTOI(vp); 2932 int error, delta, bshift, bsize; 2933 UVMHIST_FUNC("ufs_gop_alloc"); UVMHIST_CALLED(ubchist); 2934 2935 error = 0; 2936 bshift = vp->v_mount->mnt_fs_bshift; 2937 bsize = 1 << bshift; 2938 2939 delta = off & (bsize - 1); 2940 off -= delta; 2941 len += delta; 2942 2943 while (len > 0) { 2944 bsize = MIN(bsize, len); 2945 2946 error = UFS_BALLOC(vp, off, bsize, cred, flags, NULL); 2947 if (error) { 2948 goto out; 2949 } 2950 2951 /* 2952 * increase file size now, UFS_BALLOC() requires that 2953 * EOF be up-to-date before each call. 2954 */ 2955 2956 if (ip->i_size < off + bsize) { 2957 UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x", 2958 vp, ip->i_size, off + bsize, 0); 2959 ip->i_size = off + bsize; 2960 DIP_ASSIGN(ip, size, ip->i_size); 2961 } 2962 2963 off += bsize; 2964 len -= bsize; 2965 } 2966 2967out: 2968 UFS_WAPBL_UPDATE(vp, NULL, NULL, 0); 2969 return error; 2970} 2971 2972void 2973ufs_gop_markupdate(struct vnode *vp, int flags) 2974{ 2975 u_int32_t mask = 0; 2976 2977 if ((flags & GOP_UPDATE_ACCESSED) != 0) { 2978 mask = IN_ACCESS; 2979 } 2980 if ((flags & GOP_UPDATE_MODIFIED) != 0) { 2981 if (vp->v_type == VREG) { 2982 mask |= IN_CHANGE | IN_UPDATE; 2983 } else { 2984 mask |= IN_MODIFY; 2985 } 2986 } 2987 if (mask) { 2988 struct inode *ip = VTOI(vp); 2989 2990 ip->i_flag |= mask; 2991 } 2992} 2993