xfs_vnodeops.c revision 159451
1/* 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "xfs.h" 20#include "xfs_fs.h" 21#include "xfs_types.h" 22#include "xfs_bit.h" 23#include "xfs_log.h" 24#include "xfs_inum.h" 25#include "xfs_trans.h" 26#include "xfs_sb.h" 27#include "xfs_ag.h" 28#include "xfs_dir.h" 29#include "xfs_dir2.h" 30#include "xfs_dmapi.h" 31#include "xfs_mount.h" 32#include "xfs_da_btree.h" 33#include "xfs_bmap_btree.h" 34#include "xfs_alloc_btree.h" 35#include "xfs_ialloc_btree.h" 36#include "xfs_dir_sf.h" 37#include "xfs_dir2_sf.h" 38#include "xfs_attr_sf.h" 39#include "xfs_dinode.h" 40#include "xfs_inode.h" 41#include "xfs_inode_item.h" 42#include "xfs_dir_leaf.h" 43#include "xfs_itable.h" 44#include "xfs_btree.h" 45#include "xfs_ialloc.h" 46#include "xfs_alloc.h" 47#include "xfs_bmap.h" 48#include "xfs_attr.h" 49#include "xfs_rw.h" 50#include "xfs_error.h" 51#include "xfs_quota.h" 52#include "xfs_utils.h" 53#include "xfs_rtalloc.h" 54#include "xfs_refcache.h" 55#include "xfs_trans_space.h" 56#include "xfs_log_priv.h" 57#include "xfs_mac.h" 58 59#include "xfs_fs.h" 60 61/* 62 * The maximum pathlen is 1024 bytes. Since the minimum file system 63 * blocksize is 512 bytes, we can get a max of 2 extents back from 64 * bmapi. 65 */ 66#define SYMLINK_MAPS 2 67 68/* 69 * For xfs, we check that the file isn't too big to be opened by this kernel. 70 * No other open action is required for regular files. Devices are handled 71 * through the specfs file system, pipes through fifofs. Device and 72 * fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively, 73 * when a new vnode is first looked up or created. 74 */ 75STATIC int 76xfs_open( 77 bhv_desc_t *bdp, 78 cred_t *credp) 79{ 80 int mode; 81 xfs_vnode_t *vp; 82 xfs_inode_t *ip; 83 84 vp = BHV_TO_VNODE(bdp); 85 ip = XFS_BHVTOI(bdp); 86 87 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 88 return XFS_ERROR(EIO); 89 90 /* 91 * If it's a directory with any blocks, read-ahead block 0 92 * as we're almost certain to have the next operation be a read there. 93 */ 94 if (VN_ISDIR(vp) && ip->i_d.di_nextents > 0) { 95 mode = xfs_ilock_map_shared(ip); 96 if (ip->i_d.di_nextents > 0) 97 (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); 98 xfs_iunlock(ip, mode); 99 } 100 return 0; 101} 102 103 104/* 105 * xfs_getattr 106 */ 107STATIC int 108xfs_getattr( 109 bhv_desc_t *bdp, 110 xfs_vattr_t *vap, 111 int flags, 112 cred_t *credp) 113{ 114 xfs_inode_t *ip; 115 xfs_mount_t *mp; 116 xfs_vnode_t *vp; 117 118 vp = BHV_TO_VNODE(bdp); 119 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 120 121 ip = XFS_BHVTOI(bdp); 122 mp = ip->i_mount; 123 124 if (XFS_FORCED_SHUTDOWN(mp)) 125 return XFS_ERROR(EIO); 126 127 if (!(flags & ATTR_LAZY)) 128 xfs_ilock(ip, XFS_ILOCK_SHARED); 129 130 vap->va_size = ip->i_d.di_size; 131 if (vap->va_mask == XFS_AT_SIZE) 132 goto all_done; 133 134 vap->va_nblocks = 135 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); 136 vap->va_nodeid = ip->i_ino; 137#if XFS_BIG_INUMS 138 vap->va_nodeid += mp->m_inoadd; 139#endif 140 vap->va_nlink = ip->i_d.di_nlink; 141 142 /* 143 * Quick exit for non-stat callers 144 */ 145 if ((vap->va_mask & 146 ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID| 147 XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0) 148 goto all_done; 149 150 /* 151 * Copy from in-core inode. 152 */ 153 vap->va_mode = ip->i_d.di_mode; 154 vap->va_uid = ip->i_d.di_uid; 155 vap->va_gid = ip->i_d.di_gid; 156 vap->va_projid = ip->i_d.di_projid; 157 158 /* 159 * Check vnode type block/char vs. everything else. 160 */ 161 switch (ip->i_d.di_mode & S_IFMT) { 162 case S_IFBLK: 163 case S_IFCHR: 164 vap->va_rdev = ip->i_df.if_u2.if_rdev; 165 vap->va_blocksize = BLKDEV_IOSIZE; 166 break; 167 default: 168 vap->va_rdev = 0; 169 170 if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { 171 vap->va_blocksize = xfs_preferred_iosize(mp); 172 } else { 173 174 /* 175 * If the file blocks are being allocated from a 176 * realtime partition, then return the inode's 177 * realtime extent size or the realtime volume's 178 * extent size. 179 */ 180 vap->va_blocksize = ip->i_d.di_extsize ? 181 (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : 182 (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog); 183 } 184 break; 185 } 186 187 vn_atime_to_timespec(vp, &vap->va_atime); 188 vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec; 189 vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; 190 vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec; 191 vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; 192 193 /* 194 * Exit for stat callers. See if any of the rest of the fields 195 * to be filled in are needed. 196 */ 197 if ((vap->va_mask & 198 (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS| 199 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0) 200 goto all_done; 201 202 /* 203 * Convert di_flags to xflags. 204 */ 205 vap->va_xflags = xfs_ip2xflags(ip); 206 207 /* 208 * Exit for inode revalidate. See if any of the rest of 209 * the fields to be filled in are needed. 210 */ 211 if ((vap->va_mask & 212 (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS| 213 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0) 214 goto all_done; 215 216 vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog; 217 vap->va_nextents = 218 (ip->i_df.if_flags & XFS_IFEXTENTS) ? 219 ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) : 220 ip->i_d.di_nextents; 221 if (ip->i_afp) 222 vap->va_anextents = 223 (ip->i_afp->if_flags & XFS_IFEXTENTS) ? 224 ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) : 225 ip->i_d.di_anextents; 226 else 227 vap->va_anextents = 0; 228 vap->va_gen = ip->i_d.di_gen; 229 230 all_done: 231 if (!(flags & ATTR_LAZY)) 232 xfs_iunlock(ip, XFS_ILOCK_SHARED); 233 return 0; 234} 235 236 237/* 238 * xfs_setattr 239 */ 240int 241xfs_setattr( 242 bhv_desc_t *bdp, 243 xfs_vattr_t *vap, 244 int flags, 245 cred_t *credp) 246{ 247 xfs_inode_t *ip; 248 xfs_trans_t *tp; 249 xfs_mount_t *mp; 250 int mask; 251 int code; 252 uint lock_flags; 253 uint commit_flags=0; 254 uid_t uid=0, iuid=0; 255 gid_t gid=0, igid=0; 256 int timeflags = 0; 257 xfs_vnode_t *vp; 258 xfs_prid_t projid=0, iprojid=0; 259 int mandlock_before, mandlock_after; 260 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 261 int file_owner; 262 int need_iolock = 1; 263 264 vp = BHV_TO_VNODE(bdp); 265 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 266 267 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 268 return XFS_ERROR(EROFS); 269 270 /* 271 * Cannot set certain attributes. 272 */ 273 mask = vap->va_mask; 274 if (mask & XFS_AT_NOSET) { 275 return XFS_ERROR(EINVAL); 276 } 277 278 ip = XFS_BHVTOI(bdp); 279 mp = ip->i_mount; 280 281 if (XFS_FORCED_SHUTDOWN(mp)) 282 return XFS_ERROR(EIO); 283 284 /* 285 * Timestamps do not need to be logged and hence do not 286 * need to be done within a transaction. 287 */ 288 if (mask & XFS_AT_UPDTIMES) { 289 ASSERT((mask & ~XFS_AT_UPDTIMES) == 0); 290 timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) | 291 ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) | 292 ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0); 293 xfs_ichgtime(ip, timeflags); 294 return 0; 295 } 296 297 olddquot1 = olddquot2 = NULL; 298 udqp = gdqp = NULL; 299 300 /* 301 * If disk quotas is on, we make sure that the dquots do exist on disk, 302 * before we start any other transactions. Trying to do this later 303 * is messy. We don't care to take a readlock to look at the ids 304 * in inode here, because we can't hold it across the trans_reserve. 305 * If the IDs do change before we take the ilock, we're covered 306 * because the i_*dquot fields will get updated anyway. 307 */ 308 if (XFS_IS_QUOTA_ON(mp) && 309 (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) { 310 uint qflags = 0; 311 312 if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { 313 uid = vap->va_uid; 314 qflags |= XFS_QMOPT_UQUOTA; 315 } else { 316 uid = ip->i_d.di_uid; 317 } 318 if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) { 319 gid = vap->va_gid; 320 qflags |= XFS_QMOPT_GQUOTA; 321 } else { 322 gid = ip->i_d.di_gid; 323 } 324 if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) { 325 projid = vap->va_projid; 326 qflags |= XFS_QMOPT_PQUOTA; 327 } else { 328 projid = ip->i_d.di_projid; 329 } 330 /* 331 * We take a reference when we initialize udqp and gdqp, 332 * so it is important that we never blindly double trip on 333 * the same variable. See xfs_create() for an example. 334 */ 335 ASSERT(udqp == NULL); 336 ASSERT(gdqp == NULL); 337 code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags, 338 &udqp, &gdqp); 339 if (code) 340 return code; 341 } 342 343 /* 344 * For the other attributes, we acquire the inode lock and 345 * first do an error checking pass. 346 */ 347 tp = NULL; 348 lock_flags = XFS_ILOCK_EXCL; 349 ASSERT(flags & ATTR_NOLOCK ? flags & ATTR_DMI : 1); 350 if (flags & ATTR_NOLOCK) 351 need_iolock = 0; 352 if (!(mask & XFS_AT_SIZE)) { 353 if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) || 354 (mp->m_flags & XFS_MOUNT_WSYNC)) { 355 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 356 commit_flags = 0; 357 if ((code = xfs_trans_reserve(tp, 0, 358 XFS_ICHANGE_LOG_RES(mp), 0, 359 0, 0))) { 360 lock_flags = 0; 361 goto error_return; 362 } 363 } 364 } else { 365 if (DM_EVENT_ENABLED (vp->v_vfsp, ip, DM_EVENT_TRUNCATE) && 366 !(flags & ATTR_DMI)) { 367 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; 368 code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp, 369 vap->va_size, 0, dmflags, NULL); 370 if (code) { 371 lock_flags = 0; 372 goto error_return; 373 } 374 } 375 if (need_iolock) 376 lock_flags |= XFS_IOLOCK_EXCL; 377 } 378 379 xfs_ilock(ip, lock_flags); 380 381 /* boolean: are we the file owner? */ 382#if 0 383 file_owner = (current_fsuid(credp) == ip->i_d.di_uid); 384#endif 385 386 /* 387 * Change various properties of a file. 388 * Only the owner or users with CAP_FOWNER 389 * capability may do these things. 390 */ 391 if (mask & 392 (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID| 393 XFS_AT_GID|XFS_AT_PROJID)) { 394 /* 395 * CAP_FOWNER overrides the following restrictions: 396 * 397 * The user ID of the calling process must be equal 398 * to the file owner ID, except in cases where the 399 * CAP_FSETID capability is applicable. 400 */ 401 if (!file_owner && !capable(CAP_FOWNER)) { 402 code = XFS_ERROR(EPERM); 403 goto error_return; 404 } 405 406 /* 407 * CAP_FSETID overrides the following restrictions: 408 * 409 * The effective user ID of the calling process shall match 410 * the file owner when setting the set-user-ID and 411 * set-group-ID bits on that file. 412 * 413 * The effective group ID or one of the supplementary group 414 * IDs of the calling process shall match the group owner of 415 * the file when setting the set-group-ID bit on that file 416 */ 417 if (mask & XFS_AT_MODE) { 418 mode_t m = 0; 419 420 if ((vap->va_mode & S_ISUID) && !file_owner) 421 m |= S_ISUID; 422 if ((vap->va_mode & S_ISGID) && 423 !groupmember((gid_t)ip->i_d.di_gid, credp)) 424 m |= S_ISGID; 425#if 1 426 /* Linux allows this, Irix doesn't. */ 427 if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp)) 428 m |= S_ISVTX; 429#endif 430 if (m && !capable(CAP_FSETID)) 431 vap->va_mode &= ~m; 432 } 433 } 434 435 /* 436 * Change file ownership. Must be the owner or privileged. 437 * If the system was configured with the "restricted_chown" 438 * option, the owner is not permitted to give away the file, 439 * and can change the group id only to a group of which he 440 * or she is a member. 441 */ 442 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 443 /* 444 * These IDs could have changed since we last looked at them. 445 * But, we're assured that if the ownership did change 446 * while we didn't have the inode locked, inode's dquot(s) 447 * would have changed also. 448 */ 449 iuid = ip->i_d.di_uid; 450 iprojid = ip->i_d.di_projid; 451 igid = ip->i_d.di_gid; 452 gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; 453 uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid; 454 455 projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid : 456 iprojid; 457 458 /* 459 * CAP_CHOWN overrides the following restrictions: 460 * 461 * If _POSIX_CHOWN_RESTRICTED is defined, this capability 462 * shall override the restriction that a process cannot 463 * change the user ID of a file it owns and the restriction 464 * that the group ID supplied to the chown() function 465 * shall be equal to either the group ID or one of the 466 * supplementary group IDs of the calling process. 467 */ 468 if (restricted_chown && 469 (iuid != uid || (igid != gid && 470 !groupmember((gid_t)gid, credp))) && 471 !capable(CAP_CHOWN)) { 472 code = XFS_ERROR(EPERM); 473 goto error_return; 474 } 475 /* 476 * Do a quota reservation only if uid/projid/gid is actually 477 * going to change. 478 */ 479 if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || 480 (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) || 481 (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { 482 ASSERT(tp); 483 code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, 484 capable(CAP_FOWNER) ? 485 XFS_QMOPT_FORCE_RES : 0); 486 if (code) /* out of quota */ 487 goto error_return; 488 } 489 } 490 491 /* 492 * Truncate file. Must have write permission and not be a directory. 493 */ 494 if (mask & XFS_AT_SIZE) { 495 /* Short circuit the truncate case for zero length files */ 496 if ((vap->va_size == 0) && 497 (ip->i_d.di_size == 0) && (ip->i_d.di_nextents == 0)) { 498 xfs_iunlock(ip, XFS_ILOCK_EXCL); 499 lock_flags &= ~XFS_ILOCK_EXCL; 500 if (mask & XFS_AT_CTIME) 501 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 502 code = 0; 503 goto error_return; 504 } 505 506 if (VN_ISDIR(vp)) { 507 code = XFS_ERROR(EISDIR); 508 goto error_return; 509 } else if (!VN_ISREG(vp)) { 510 code = XFS_ERROR(EINVAL); 511 goto error_return; 512 } 513 /* 514 * Make sure that the dquots are attached to the inode. 515 */ 516 if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED))) 517 goto error_return; 518 } 519 520 /* 521 * Change file access or modified times. 522 */ 523 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 524 if (!file_owner) { 525 if ((flags & ATTR_UTIME) && 526 !capable(CAP_FOWNER)) { 527 code = XFS_ERROR(EPERM); 528 goto error_return; 529 } 530 } 531 } 532 533 /* 534 * Change extent size or realtime flag. 535 */ 536 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) { 537 /* 538 * Can't change extent size if any extents are allocated. 539 */ 540 if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) && 541 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != 542 vap->va_extsize) ) { 543 code = XFS_ERROR(EINVAL); /* EFBIG? */ 544 goto error_return; 545 } 546 /* 547 * Can't change realtime flag if any extents are allocated. 548 */ 549 if ((ip->i_d.di_nextents || ip->i_delayed_blks) && 550 (mask & XFS_AT_XFLAGS) && 551 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 552 (vap->va_xflags & XFS_XFLAG_REALTIME)) { 553 code = XFS_ERROR(EINVAL); /* EFBIG? */ 554 goto error_return; 555 } 556 557 /* 558 * Extent size must be a multiple of the appropriate block 559 * size, if set at all. 560 */ 561 if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) { 562 xfs_extlen_t size; 563 564 if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) || 565 ((mask & XFS_AT_XFLAGS) && 566 (vap->va_xflags & XFS_XFLAG_REALTIME))) { 567 size = mp->m_sb.sb_rextsize << 568 mp->m_sb.sb_blocklog; 569 } else { 570 size = mp->m_sb.sb_blocksize; 571 } 572 if (vap->va_extsize % size) { 573 code = XFS_ERROR(EINVAL); 574 goto error_return; 575 } 576 } 577 /* 578 * If realtime flag is set then must have realtime data. 579 */ 580 if ((mask & XFS_AT_XFLAGS) && 581 (vap->va_xflags & XFS_XFLAG_REALTIME)) { 582 if ((mp->m_sb.sb_rblocks == 0) || 583 (mp->m_sb.sb_rextsize == 0) || 584 (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { 585 code = XFS_ERROR(EINVAL); 586 goto error_return; 587 } 588 } 589 590 /* 591 * Can't modify an immutable/append-only file unless 592 * we have appropriate permission. 593 */ 594 if ((mask & XFS_AT_XFLAGS) && 595 (ip->i_d.di_flags & 596 (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || 597 (vap->va_xflags & 598 (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && 599 !capable(CAP_LINUX_IMMUTABLE)) { 600 code = XFS_ERROR(EPERM); 601 goto error_return; 602 } 603 } 604 605 /* 606 * Now we can make the changes. Before we join the inode 607 * to the transaction, if XFS_AT_SIZE is set then take care of 608 * the part of the truncation that must be done without the 609 * inode lock. This needs to be done before joining the inode 610 * to the transaction, because the inode cannot be unlocked 611 * once it is a part of the transaction. 612 */ 613 if (mask & XFS_AT_SIZE) { 614 code = 0; 615 if ((vap->va_size > ip->i_d.di_size) && 616 (flags & ATTR_NOSIZETOK) == 0) { 617 code = xfs_igrow_start(ip, vap->va_size, credp); 618 } 619 xfs_iunlock(ip, XFS_ILOCK_EXCL); 620 vn_iowait(vp); /* wait for the completion of any pending DIOs */ 621 if (!code) 622 code = xfs_itruncate_data(ip, vap->va_size); 623 if (code) { 624 ASSERT(tp == NULL); 625 lock_flags &= ~XFS_ILOCK_EXCL; 626 ASSERT(lock_flags == XFS_IOLOCK_EXCL); 627 goto error_return; 628 } 629 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 630 if ((code = xfs_trans_reserve(tp, 0, 631 XFS_ITRUNCATE_LOG_RES(mp), 0, 632 XFS_TRANS_PERM_LOG_RES, 633 XFS_ITRUNCATE_LOG_COUNT))) { 634 xfs_trans_cancel(tp, 0); 635 if (need_iolock) 636 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 637 return code; 638 } 639 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 640 xfs_ilock(ip, XFS_ILOCK_EXCL); 641 } 642 643 if (tp) { 644 xfs_trans_ijoin(tp, ip, lock_flags); 645 xfs_trans_ihold(tp, ip); 646 } 647 648 /* determine whether mandatory locking mode changes */ 649 mandlock_before = MANDLOCK(vp, ip->i_d.di_mode); 650 651 /* 652 * Truncate file. Must have write permission and not be a directory. 653 */ 654 if (mask & XFS_AT_SIZE) { 655 if (vap->va_size > ip->i_d.di_size) { 656 xfs_igrow_finish(tp, ip, vap->va_size, 657 !(flags & ATTR_DMI)); 658 } else if ((vap->va_size <= ip->i_d.di_size) || 659 ((vap->va_size == 0) && ip->i_d.di_nextents)) { 660 /* 661 * signal a sync transaction unless 662 * we're truncating an already unlinked 663 * file on a wsync filesystem 664 */ 665 code = xfs_itruncate_finish(&tp, ip, 666 (xfs_fsize_t)vap->va_size, 667 XFS_DATA_FORK, 668 ((ip->i_d.di_nlink != 0 || 669 !(mp->m_flags & XFS_MOUNT_WSYNC)) 670 ? 1 : 0)); 671 if (code) { 672 goto abort_return; 673 } 674 } 675 /* 676 * Have to do this even if the file's size doesn't change. 677 */ 678 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 679 } 680 681 /* 682 * Change file access modes. 683 */ 684 if (mask & XFS_AT_MODE) { 685 ip->i_d.di_mode &= S_IFMT; 686 ip->i_d.di_mode |= vap->va_mode & ~S_IFMT; 687 688 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 689 timeflags |= XFS_ICHGTIME_CHG; 690 } 691 692 /* 693 * Change file ownership. Must be the owner or privileged. 694 * If the system was configured with the "restricted_chown" 695 * option, the owner is not permitted to give away the file, 696 * and can change the group id only to a group of which he 697 * or she is a member. 698 */ 699 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 700 /* 701 * CAP_FSETID overrides the following restrictions: 702 * 703 * The set-user-ID and set-group-ID bits of a file will be 704 * cleared upon successful return from chown() 705 */ 706 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && 707 !capable(CAP_FSETID)) { 708 ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); 709 } 710 711 /* 712 * Change the ownerships and register quota modifications 713 * in the transaction. 714 */ 715 if (iuid != uid) { 716 if (XFS_IS_UQUOTA_ON(mp)) { 717 ASSERT(mask & XFS_AT_UID); 718 ASSERT(udqp); 719 olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 720 &ip->i_udquot, udqp); 721 } 722 ip->i_d.di_uid = uid; 723 } 724 if (igid != gid) { 725 if (XFS_IS_GQUOTA_ON(mp)) { 726 ASSERT(!XFS_IS_PQUOTA_ON(mp)); 727 ASSERT(mask & XFS_AT_GID); 728 ASSERT(gdqp); 729 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 730 &ip->i_gdquot, gdqp); 731 } 732 ip->i_d.di_gid = gid; 733 } 734 if (iprojid != projid) { 735 if (XFS_IS_PQUOTA_ON(mp)) { 736 ASSERT(!XFS_IS_GQUOTA_ON(mp)); 737 ASSERT(mask & XFS_AT_PROJID); 738 ASSERT(gdqp); 739 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 740 &ip->i_gdquot, gdqp); 741 } 742 ip->i_d.di_projid = projid; 743 /* 744 * We may have to rev the inode as well as 745 * the superblock version number since projids didn't 746 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. 747 */ 748 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) 749 xfs_bump_ino_vers2(tp, ip); 750 } 751 752 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 753 timeflags |= XFS_ICHGTIME_CHG; 754 } 755 756 757 /* 758 * Change file access or modified times. 759 */ 760 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 761 if (mask & XFS_AT_ATIME) { 762 ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec; 763 ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec; 764 ip->i_update_core = 1; 765 //timeflags &= ~XFS_ICHGTIME_ACC; 766 } 767 if (mask & XFS_AT_MTIME) { 768 ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec; 769 ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec; 770 timeflags &= ~XFS_ICHGTIME_MOD; 771 timeflags |= XFS_ICHGTIME_CHG; 772 } 773 if (tp && (flags & ATTR_UTIME)) 774 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 775 } 776 777 /* 778 * Change XFS-added attributes. 779 */ 780 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) { 781 if (mask & XFS_AT_EXTSIZE) { 782 /* 783 * Converting bytes to fs blocks. 784 */ 785 ip->i_d.di_extsize = vap->va_extsize >> 786 mp->m_sb.sb_blocklog; 787 } 788 if (mask & XFS_AT_XFLAGS) { 789 uint di_flags; 790 791 /* can't set PREALLOC this way, just preserve it */ 792 di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); 793 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) 794 di_flags |= XFS_DIFLAG_IMMUTABLE; 795 if (vap->va_xflags & XFS_XFLAG_APPEND) 796 di_flags |= XFS_DIFLAG_APPEND; 797 if (vap->va_xflags & XFS_XFLAG_SYNC) 798 di_flags |= XFS_DIFLAG_SYNC; 799 if (vap->va_xflags & XFS_XFLAG_NOATIME) 800 di_flags |= XFS_DIFLAG_NOATIME; 801 if (vap->va_xflags & XFS_XFLAG_NODUMP) 802 di_flags |= XFS_DIFLAG_NODUMP; 803 if (vap->va_xflags & XFS_XFLAG_PROJINHERIT) 804 di_flags |= XFS_DIFLAG_PROJINHERIT; 805 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 806 if (vap->va_xflags & XFS_XFLAG_RTINHERIT) 807 di_flags |= XFS_DIFLAG_RTINHERIT; 808 if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS) 809 di_flags |= XFS_DIFLAG_NOSYMLINKS; 810 if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT) 811 di_flags |= XFS_DIFLAG_EXTSZINHERIT; 812 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 813 if (vap->va_xflags & XFS_XFLAG_REALTIME) { 814 di_flags |= XFS_DIFLAG_REALTIME; 815 ip->i_iocore.io_flags |= XFS_IOCORE_RT; 816 } else { 817 ip->i_iocore.io_flags &= ~XFS_IOCORE_RT; 818 } 819 if (vap->va_xflags & XFS_XFLAG_EXTSIZE) 820 di_flags |= XFS_DIFLAG_EXTSIZE; 821 } 822 ip->i_d.di_flags = di_flags; 823 } 824 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 825 timeflags |= XFS_ICHGTIME_CHG; 826 } 827 828 /* 829 * Change file inode change time only if XFS_AT_CTIME set 830 * AND we have been called by a DMI function. 831 */ 832 833 if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) { 834 ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec; 835 ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec; 836 ip->i_update_core = 1; 837 timeflags &= ~XFS_ICHGTIME_CHG; 838 } 839 840 /* 841 * Send out timestamp changes that need to be set to the 842 * current time. Not done when called by a DMI function. 843 */ 844 if (timeflags && !(flags & ATTR_DMI)) 845 xfs_ichgtime(ip, timeflags); 846 847 XFS_STATS_INC(xs_ig_attrchg); 848 849 /* 850 * If this is a synchronous mount, make sure that the 851 * transaction goes to disk before returning to the user. 852 * This is slightly sub-optimal in that truncates require 853 * two sync transactions instead of one for wsync filesystems. 854 * One for the truncate and one for the timestamps since we 855 * don't want to change the timestamps unless we're sure the 856 * truncate worked. Truncates are less than 1% of the laddis 857 * mix so this probably isn't worth the trouble to optimize. 858 */ 859 code = 0; 860 if (tp) { 861 if (mp->m_flags & XFS_MOUNT_WSYNC) 862 xfs_trans_set_sync(tp); 863 864 code = xfs_trans_commit(tp, commit_flags, NULL); 865 } 866 867 /* 868 * If the (regular) file's mandatory locking mode changed, then 869 * notify the vnode. We do this under the inode lock to prevent 870 * racing calls to vop_vnode_change. 871 */ 872 mandlock_after = MANDLOCK(vp, ip->i_d.di_mode); 873 if (mandlock_before != mandlock_after) { 874 XVOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_ENF_LOCKING, 875 mandlock_after); 876 } 877 878 xfs_iunlock(ip, lock_flags); 879 880 /* 881 * Release any dquot(s) the inode had kept before chown. 882 */ 883 XFS_QM_DQRELE(mp, olddquot1); 884 XFS_QM_DQRELE(mp, olddquot2); 885 XFS_QM_DQRELE(mp, udqp); 886 XFS_QM_DQRELE(mp, gdqp); 887 888 if (code) { 889 return code; 890 } 891 892 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_ATTRIBUTE) && 893 !(flags & ATTR_DMI)) { 894 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL, 895 NULL, DM_RIGHT_NULL, NULL, NULL, 896 0, 0, AT_DELAY_FLAG(flags)); 897 } 898 return 0; 899 900 abort_return: 901 commit_flags |= XFS_TRANS_ABORT; 902 /* FALLTHROUGH */ 903 error_return: 904 XFS_QM_DQRELE(mp, udqp); 905 XFS_QM_DQRELE(mp, gdqp); 906 if (tp) { 907 xfs_trans_cancel(tp, commit_flags); 908 } 909 if (lock_flags != 0) { 910 xfs_iunlock(ip, lock_flags); 911 } 912 return code; 913} 914 915 916/* 917 * xfs_access 918 * Null conversion from vnode mode bits to inode mode bits, as in efs. 919 */ 920STATIC int 921xfs_access( 922 bhv_desc_t *bdp, 923 int mode, 924 cred_t *credp) 925{ 926 xfs_inode_t *ip; 927 int error; 928 929 vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__, 930 (inst_t *)__return_address); 931 932 ip = XFS_BHVTOI(bdp); 933 xfs_ilock(ip, XFS_ILOCK_SHARED); 934 error = xfs_iaccess(ip, mode, credp); 935 xfs_iunlock(ip, XFS_ILOCK_SHARED); 936 return error; 937} 938 939 940/* 941 * xfs_readlink 942 * 943 */ 944STATIC int 945xfs_readlink( 946 bhv_desc_t *bdp, 947 uio_t *uiop, 948 int ioflags, 949 cred_t *credp) 950{ 951 xfs_inode_t *ip; 952 int count; 953 xfs_off_t offset; 954 int pathlen; 955 xfs_vnode_t *vp; 956 int error = 0; 957 xfs_mount_t *mp; 958 int nmaps; 959 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 960 xfs_daddr_t d; 961 int byte_cnt; 962 int n; 963 xfs_buf_t *bp; 964 965 vp = BHV_TO_VNODE(bdp); 966 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 967 968 ip = XFS_BHVTOI(bdp); 969 mp = ip->i_mount; 970 971 if (XFS_FORCED_SHUTDOWN(mp)) 972 return XFS_ERROR(EIO); 973 974 xfs_ilock(ip, XFS_ILOCK_SHARED); 975 976 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK); 977 978 offset = uiop->uio_offset; 979 count = uiop->uio_resid; 980 981 if (offset < 0) { 982 error = XFS_ERROR(EINVAL); 983 goto error_return; 984 } 985 if (count <= 0) { 986 error = 0; 987 goto error_return; 988 } 989 990 /* 991 * See if the symlink is stored inline. 992 */ 993 pathlen = (int)ip->i_d.di_size; 994 995 if (ip->i_df.if_flags & XFS_IFINLINE) { 996 error = uio_read(ip->i_df.if_u1.if_data, pathlen, uiop); 997 } 998 else { 999 /* 1000 * Symlink not inline. Call bmap to get it in. 1001 */ 1002 nmaps = SYMLINK_MAPS; 1003 1004 error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 1005 0, NULL, 0, mval, &nmaps, NULL, NULL); 1006 1007 if (error) { 1008 goto error_return; 1009 } 1010 1011 for (n = 0; n < nmaps; n++) { 1012 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 1013 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 1014 bp = xfs_buf_read(mp->m_ddev_targp, d, 1015 BTOBB(byte_cnt), 0); 1016 error = XFS_BUF_GETERROR(bp); 1017 if (error) { 1018 xfs_ioerror_alert("xfs_readlink", 1019 ip->i_mount, bp, XFS_BUF_ADDR(bp)); 1020 xfs_buf_relse(bp); 1021 goto error_return; 1022 } 1023 if (pathlen < byte_cnt) 1024 byte_cnt = pathlen; 1025 pathlen -= byte_cnt; 1026 1027 error = uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop); 1028 xfs_buf_relse (bp); 1029 } 1030 1031 } 1032 1033error_return: 1034 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1035 return error; 1036} 1037 1038 1039/* 1040 * xfs_fsync 1041 * 1042 * This is called to sync the inode and its data out to disk. 1043 * We need to hold the I/O lock while flushing the data, and 1044 * the inode lock while flushing the inode. The inode lock CANNOT 1045 * be held while flushing the data, so acquire after we're done 1046 * with that. 1047 */ 1048STATIC int 1049xfs_fsync( 1050 bhv_desc_t *bdp, 1051 int flag, 1052 cred_t *credp, 1053 xfs_off_t start, 1054 xfs_off_t stop) 1055{ 1056 xfs_inode_t *ip; 1057 xfs_trans_t *tp; 1058 int error; 1059 int log_flushed = 0, changed = 1; 1060 1061 vn_trace_entry(BHV_TO_VNODE(bdp), 1062 __FUNCTION__, (inst_t *)__return_address); 1063 1064 ip = XFS_BHVTOI(bdp); 1065 1066 ASSERT(start >= 0 && stop >= -1); 1067 1068 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 1069 return XFS_ERROR(EIO); 1070 1071 /* 1072 * We always need to make sure that the required inode state 1073 * is safe on disk. The vnode might be clean but because 1074 * of committed transactions that haven't hit the disk yet. 1075 * Likewise, there could be unflushed non-transactional 1076 * changes to the inode core that have to go to disk. 1077 * 1078 * The following code depends on one assumption: that 1079 * any transaction that changes an inode logs the core 1080 * because it has to change some field in the inode core 1081 * (typically nextents or nblocks). That assumption 1082 * implies that any transactions against an inode will 1083 * catch any non-transactional updates. If inode-altering 1084 * transactions exist that violate this assumption, the 1085 * code breaks. Right now, it figures that if the involved 1086 * update_* field is clear and the inode is unpinned, the 1087 * inode is clean. Either it's been flushed or it's been 1088 * committed and the commit has hit the disk unpinning the inode. 1089 * (Note that xfs_inode_item_format() called at commit clears 1090 * the update_* fields.) 1091 */ 1092 xfs_ilock(ip, XFS_ILOCK_SHARED); 1093 1094 /* If we are flushing data then we care about update_size 1095 * being set, otherwise we care about update_core 1096 */ 1097 if ((flag & FSYNC_DATA) ? 1098 (ip->i_update_size == 0) : 1099 (ip->i_update_core == 0)) { 1100 /* 1101 * Timestamps/size haven't changed since last inode 1102 * flush or inode transaction commit. That means 1103 * either nothing got written or a transaction 1104 * committed which caught the updates. If the 1105 * latter happened and the transaction hasn't 1106 * hit the disk yet, the inode will be still 1107 * be pinned. If it is, force the log. 1108 */ 1109 1110 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1111 1112 if (xfs_ipincount(ip)) { 1113 _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, 1114 XFS_LOG_FORCE | 1115 ((flag & FSYNC_WAIT) 1116 ? XFS_LOG_SYNC : 0), 1117 &log_flushed); 1118 } else { 1119 /* 1120 * If the inode is not pinned and nothing 1121 * has changed we don't need to flush the 1122 * cache. 1123 */ 1124 changed = 0; 1125 } 1126 error = 0; 1127 } else { 1128 /* 1129 * Kick off a transaction to log the inode 1130 * core to get the updates. Make it 1131 * sync if FSYNC_WAIT is passed in (which 1132 * is done by everybody but specfs). The 1133 * sync transaction will also force the log. 1134 */ 1135 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1136 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); 1137 if ((error = xfs_trans_reserve(tp, 0, 1138 XFS_FSYNC_TS_LOG_RES(ip->i_mount), 1139 0, 0, 0))) { 1140 xfs_trans_cancel(tp, 0); 1141 return error; 1142 } 1143 xfs_ilock(ip, XFS_ILOCK_EXCL); 1144 1145 /* 1146 * Note - it's possible that we might have pushed 1147 * ourselves out of the way during trans_reserve 1148 * which would flush the inode. But there's no 1149 * guarantee that the inode buffer has actually 1150 * gone out yet (it's delwri). Plus the buffer 1151 * could be pinned anyway if it's part of an 1152 * inode in another recent transaction. So we 1153 * play it safe and fire off the transaction anyway. 1154 */ 1155 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1156 xfs_trans_ihold(tp, ip); 1157 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1158 if (flag & FSYNC_WAIT) 1159 xfs_trans_set_sync(tp); 1160 error = _xfs_trans_commit(tp, 0, NULL, &log_flushed); 1161 1162 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1163 } 1164 1165 if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) { 1166 /* 1167 * If the log write didn't issue an ordered tag we need 1168 * to flush the disk cache for the data device now. 1169 */ 1170 if (!log_flushed) 1171 xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); 1172 1173 /* 1174 * If this inode is on the RT dev we need to flush that 1175 * cache as well. 1176 */ 1177 if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) 1178 xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); 1179 } 1180 1181 return error; 1182} 1183 1184/* 1185 * This is called by xfs_inactive to free any blocks beyond eof, 1186 * when the link count isn't zero. 1187 */ 1188STATIC int 1189xfs_inactive_free_eofblocks( 1190 xfs_mount_t *mp, 1191 xfs_inode_t *ip) 1192{ 1193 xfs_trans_t *tp; 1194 int error; 1195 xfs_fileoff_t end_fsb; 1196 xfs_fileoff_t last_fsb; 1197 xfs_filblks_t map_len; 1198 int nimaps; 1199 xfs_bmbt_irec_t imap; 1200 1201 /* 1202 * Figure out if there are any blocks beyond the end 1203 * of the file. If not, then there is nothing to do. 1204 */ 1205 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_d.di_size)); 1206 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 1207 map_len = last_fsb - end_fsb; 1208 if (map_len <= 0) 1209 return 0; 1210 1211 nimaps = 1; 1212 xfs_ilock(ip, XFS_ILOCK_SHARED); 1213 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, end_fsb, map_len, 0, 1214 NULL, 0, &imap, &nimaps, NULL, NULL); 1215 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1216 1217 if (!error && (nimaps != 0) && 1218 (imap.br_startblock != HOLESTARTBLOCK || 1219 ip->i_delayed_blks)) { 1220 /* 1221 * Attach the dquots to the inode up front. 1222 */ 1223 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 1224 return error; 1225 1226 /* 1227 * There are blocks after the end of file. 1228 * Free them up now by truncating the file to 1229 * its current size. 1230 */ 1231 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1232 1233 /* 1234 * Do the xfs_itruncate_start() call before 1235 * reserving any log space because 1236 * itruncate_start will call into the buffer 1237 * cache and we can't 1238 * do that within a transaction. 1239 */ 1240 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1241 xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 1242 ip->i_d.di_size); 1243 1244 error = xfs_trans_reserve(tp, 0, 1245 XFS_ITRUNCATE_LOG_RES(mp), 1246 0, XFS_TRANS_PERM_LOG_RES, 1247 XFS_ITRUNCATE_LOG_COUNT); 1248 if (error) { 1249 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1250 xfs_trans_cancel(tp, 0); 1251 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1252 return error; 1253 } 1254 1255 xfs_ilock(ip, XFS_ILOCK_EXCL); 1256 xfs_trans_ijoin(tp, ip, 1257 XFS_IOLOCK_EXCL | 1258 XFS_ILOCK_EXCL); 1259 xfs_trans_ihold(tp, ip); 1260 1261 error = xfs_itruncate_finish(&tp, ip, 1262 ip->i_d.di_size, 1263 XFS_DATA_FORK, 1264 0); 1265 /* 1266 * If we get an error at this point we 1267 * simply don't bother truncating the file. 1268 */ 1269 if (error) { 1270 xfs_trans_cancel(tp, 1271 (XFS_TRANS_RELEASE_LOG_RES | 1272 XFS_TRANS_ABORT)); 1273 } else { 1274 error = xfs_trans_commit(tp, 1275 XFS_TRANS_RELEASE_LOG_RES, 1276 NULL); 1277 } 1278 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1279 } 1280 return error; 1281} 1282 1283/* 1284 * Free a symlink that has blocks associated with it. 1285 */ 1286STATIC int 1287xfs_inactive_symlink_rmt( 1288 xfs_inode_t *ip, 1289 xfs_trans_t **tpp) 1290{ 1291 xfs_buf_t *bp; 1292 int committed; 1293 int done; 1294 int error; 1295 xfs_fsblock_t first_block; 1296 xfs_bmap_free_t free_list; 1297 int i; 1298 xfs_mount_t *mp; 1299 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 1300 int nmaps; 1301 xfs_trans_t *ntp; 1302 int size; 1303 xfs_trans_t *tp; 1304 1305 tp = *tpp; 1306 mp = ip->i_mount; 1307 ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip)); 1308 /* 1309 * We're freeing a symlink that has some 1310 * blocks allocated to it. Free the 1311 * blocks here. We know that we've got 1312 * either 1 or 2 extents and that we can 1313 * free them all in one bunmapi call. 1314 */ 1315 ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); 1316 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1317 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 1318 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1319 xfs_trans_cancel(tp, 0); 1320 *tpp = NULL; 1321 return error; 1322 } 1323 /* 1324 * Lock the inode, fix the size, and join it to the transaction. 1325 * Hold it so in the normal path, we still have it locked for 1326 * the second transaction. In the error paths we need it 1327 * held so the cancel won't rele it, see below. 1328 */ 1329 xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1330 size = (int)ip->i_d.di_size; 1331 ip->i_d.di_size = 0; 1332 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1333 xfs_trans_ihold(tp, ip); 1334 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1335 /* 1336 * Find the block(s) so we can inval and unmap them. 1337 */ 1338 done = 0; 1339 XFS_BMAP_INIT(&free_list, &first_block); 1340 nmaps = sizeof(mval) / sizeof(mval[0]); 1341 if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), 1342 XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, 1343 &free_list, NULL))) 1344 goto error0; 1345 /* 1346 * Invalidate the block(s). 1347 */ 1348 for (i = 0; i < nmaps; i++) { 1349 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, 1350 XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), 1351 XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); 1352 xfs_trans_binval(tp, bp); 1353 } 1354 /* 1355 * Unmap the dead block(s) to the free_list. 1356 */ 1357 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, 1358 &first_block, &free_list, NULL, &done))) 1359 goto error1; 1360 ASSERT(done); 1361 /* 1362 * Commit the first transaction. This logs the EFI and the inode. 1363 */ 1364 if ((error = xfs_bmap_finish(&tp, &free_list, first_block, &committed))) 1365 goto error1; 1366 /* 1367 * The transaction must have been committed, since there were 1368 * actually extents freed by xfs_bunmapi. See xfs_bmap_finish. 1369 * The new tp has the extent freeing and EFDs. 1370 */ 1371 ASSERT(committed); 1372 /* 1373 * The first xact was committed, so add the inode to the new one. 1374 * Mark it dirty so it will be logged and moved forward in the log as 1375 * part of every commit. 1376 */ 1377 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1378 xfs_trans_ihold(tp, ip); 1379 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1380 /* 1381 * Get a new, empty transaction to return to our caller. 1382 */ 1383 ntp = xfs_trans_dup(tp); 1384 /* 1385 * Commit the transaction containing extent freeing and EFDs. 1386 * If we get an error on the commit here or on the reserve below, 1387 * we need to unlock the inode since the new transaction doesn't 1388 * have the inode attached. 1389 */ 1390 error = xfs_trans_commit(tp, 0, NULL); 1391 tp = ntp; 1392 if (error) { 1393 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1394 goto error0; 1395 } 1396 /* 1397 * Remove the memory for extent descriptions (just bookkeeping). 1398 */ 1399 if (ip->i_df.if_bytes) 1400 xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK); 1401 ASSERT(ip->i_df.if_bytes == 0); 1402 /* 1403 * Put an itruncate log reservation in the new transaction 1404 * for our caller. 1405 */ 1406 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1407 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 1408 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1409 goto error0; 1410 } 1411 /* 1412 * Return with the inode locked but not joined to the transaction. 1413 */ 1414 *tpp = tp; 1415 return 0; 1416 1417 error1: 1418 xfs_bmap_cancel(&free_list); 1419 error0: 1420 /* 1421 * Have to come here with the inode locked and either 1422 * (held and in the transaction) or (not in the transaction). 1423 * If the inode isn't held then cancel would iput it, but 1424 * that's wrong since this is inactive and the vnode ref 1425 * count is 0 already. 1426 * Cancel won't do anything to the inode if held, but it still 1427 * needs to be locked until the cancel is done, if it was 1428 * joined to the transaction. 1429 */ 1430 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1431 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1432 *tpp = NULL; 1433 return error; 1434 1435} 1436 1437STATIC int 1438xfs_inactive_symlink_local( 1439 xfs_inode_t *ip, 1440 xfs_trans_t **tpp) 1441{ 1442 int error; 1443 1444 ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip)); 1445 /* 1446 * We're freeing a symlink which fit into 1447 * the inode. Just free the memory used 1448 * to hold the old symlink. 1449 */ 1450 error = xfs_trans_reserve(*tpp, 0, 1451 XFS_ITRUNCATE_LOG_RES(ip->i_mount), 1452 0, XFS_TRANS_PERM_LOG_RES, 1453 XFS_ITRUNCATE_LOG_COUNT); 1454 1455 if (error) { 1456 xfs_trans_cancel(*tpp, 0); 1457 *tpp = NULL; 1458 return error; 1459 } 1460 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1461 1462 /* 1463 * Zero length symlinks _can_ exist. 1464 */ 1465 if (ip->i_df.if_bytes > 0) { 1466 xfs_idata_realloc(ip, 1467 -(ip->i_df.if_bytes), 1468 XFS_DATA_FORK); 1469 ASSERT(ip->i_df.if_bytes == 0); 1470 } 1471 return 0; 1472} 1473 1474/* 1475 * 1476 */ 1477STATIC int 1478xfs_inactive_attrs( 1479 xfs_inode_t *ip, 1480 xfs_trans_t **tpp) 1481{ 1482 xfs_trans_t *tp; 1483 int error; 1484 xfs_mount_t *mp; 1485 1486 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); 1487 tp = *tpp; 1488 mp = ip->i_mount; 1489 ASSERT(ip->i_d.di_forkoff != 0); 1490 xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 1491 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1492 1493 error = xfs_attr_inactive(ip); 1494 if (error) { 1495 *tpp = NULL; 1496 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1497 return error; /* goto out */ 1498 } 1499 1500 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1501 error = xfs_trans_reserve(tp, 0, 1502 XFS_IFREE_LOG_RES(mp), 1503 0, XFS_TRANS_PERM_LOG_RES, 1504 XFS_INACTIVE_LOG_COUNT); 1505 if (error) { 1506 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1507 xfs_trans_cancel(tp, 0); 1508 *tpp = NULL; 1509 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1510 return error; 1511 } 1512 1513 xfs_ilock(ip, XFS_ILOCK_EXCL); 1514 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1515 xfs_trans_ihold(tp, ip); 1516 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1517 1518 ASSERT(ip->i_d.di_anextents == 0); 1519 1520 *tpp = tp; 1521 return 0; 1522} 1523 1524STATIC int 1525xfs_release( 1526 bhv_desc_t *bdp) 1527{ 1528 xfs_inode_t *ip; 1529 xfs_vnode_t *vp; 1530 xfs_mount_t *mp; 1531 int error; 1532 1533 vp = BHV_TO_VNODE(bdp); 1534 ip = XFS_BHVTOI(bdp); 1535 1536 if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) { 1537 return 0; 1538 } 1539 1540 /* If this is a read-only mount, don't do this (would generate I/O) */ 1541 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 1542 return 0; 1543 1544#ifdef HAVE_REFCACHE 1545 /* If we are in the NFS reference cache then don't do this now */ 1546 if (ip->i_refcache) 1547 return 0; 1548#endif 1549 1550 mp = ip->i_mount; 1551 1552 if (ip->i_d.di_nlink != 0) { 1553 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1554 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || 1555 ip->i_delayed_blks > 0)) && 1556 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1557 (!(ip->i_d.di_flags & 1558 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 1559 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1560 return error; 1561 1562#ifdef RMC /* Update linux inode block count after free above */ 1563 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, 1564 ip->i_d.di_nblocks + ip->i_delayed_blks); 1565#endif 1566 } 1567 } 1568 1569 return 0; 1570} 1571 1572/* 1573 * xfs_inactive 1574 * 1575 * This is called when the vnode reference count for the vnode 1576 * goes to zero. If the file has been unlinked, then it must 1577 * now be truncated. Also, we clear all of the read-ahead state 1578 * kept for the inode here since the file is now closed. 1579 */ 1580STATIC int 1581xfs_inactive( 1582 bhv_desc_t *bdp, 1583 cred_t *credp) 1584{ 1585 xfs_inode_t *ip; 1586 xfs_vnode_t *vp; 1587 1588 xfs_bmap_free_t free_list; 1589 xfs_fsblock_t first_block; 1590 int committed; 1591 xfs_trans_t *tp; 1592 xfs_mount_t *mp; 1593 int error; 1594 int truncate; 1595 1596 vp = BHV_TO_VNODE(bdp); 1597 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 1598 1599 ip = XFS_BHVTOI(bdp); 1600 1601 /* 1602 * If the inode is already free, then there can be nothing 1603 * to clean up here. 1604 */ 1605 if (ip->i_d.di_mode == 0 || VN_BAD(vp)) { 1606 ASSERT(ip->i_df.if_real_bytes == 0); 1607 ASSERT(ip->i_df.if_broot_bytes == 0); 1608 return VN_INACTIVE_CACHE; 1609 } 1610 1611 /* 1612 * Only do a truncate if it's a regular file with 1613 * some actual space in it. It's OK to look at the 1614 * inode's fields without the lock because we're the 1615 * only one with a reference to the inode. 1616 */ 1617 truncate = ((ip->i_d.di_nlink == 0) && 1618 ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) || 1619 (ip->i_delayed_blks > 0)) && 1620 ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); 1621 1622 mp = ip->i_mount; 1623 1624 if (ip->i_d.di_nlink == 0 && 1625 DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_DESTROY)) { 1626 (void) XFS_SEND_DESTROY(mp, vp, DM_RIGHT_NULL); 1627 } 1628 1629 error = 0; 1630 1631 /* If this is a read-only mount, don't do this (would generate I/O) */ 1632 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 1633 goto out; 1634 1635 if (ip->i_d.di_nlink != 0) { 1636 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1637 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || 1638 ip->i_delayed_blks > 0)) && 1639 (ip->i_df.if_flags & XFS_IFEXTENTS) && 1640 (!(ip->i_d.di_flags & 1641 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 1642 (ip->i_delayed_blks != 0)))) { 1643 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1644 return VN_INACTIVE_CACHE; 1645#ifdef RMC 1646 /* Update linux inode block count after free above */ 1647 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, 1648 ip->i_d.di_nblocks + ip->i_delayed_blks); 1649#endif 1650 } 1651 goto out; 1652 } 1653 1654 ASSERT(ip->i_d.di_nlink == 0); 1655 1656 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 1657 return VN_INACTIVE_CACHE; 1658 1659 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1660 if (truncate) { 1661 /* 1662 * Do the xfs_itruncate_start() call before 1663 * reserving any log space because itruncate_start 1664 * will call into the buffer cache and we can't 1665 * do that within a transaction. 1666 */ 1667 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1668 1669 xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); 1670 1671 error = xfs_trans_reserve(tp, 0, 1672 XFS_ITRUNCATE_LOG_RES(mp), 1673 0, XFS_TRANS_PERM_LOG_RES, 1674 XFS_ITRUNCATE_LOG_COUNT); 1675 if (error) { 1676 /* Don't call itruncate_cleanup */ 1677 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1678 xfs_trans_cancel(tp, 0); 1679 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1680 return VN_INACTIVE_CACHE; 1681 } 1682 1683 xfs_ilock(ip, XFS_ILOCK_EXCL); 1684 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1685 xfs_trans_ihold(tp, ip); 1686 1687 /* 1688 * normally, we have to run xfs_itruncate_finish sync. 1689 * But if filesystem is wsync and we're in the inactive 1690 * path, then we know that nlink == 0, and that the 1691 * xaction that made nlink == 0 is permanently committed 1692 * since xfs_remove runs as a synchronous transaction. 1693 */ 1694 error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1695 (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0)); 1696 1697 if (error) { 1698 xfs_trans_cancel(tp, 1699 XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1700 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1701 return VN_INACTIVE_CACHE; 1702 } 1703 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) { 1704 1705 /* 1706 * If we get an error while cleaning up a 1707 * symlink we bail out. 1708 */ 1709 error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ? 1710 xfs_inactive_symlink_rmt(ip, &tp) : 1711 xfs_inactive_symlink_local(ip, &tp); 1712 1713 if (error) { 1714 ASSERT(tp == NULL); 1715 return VN_INACTIVE_CACHE; 1716 } 1717 1718 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1719 xfs_trans_ihold(tp, ip); 1720 } else { 1721 error = xfs_trans_reserve(tp, 0, 1722 XFS_IFREE_LOG_RES(mp), 1723 0, XFS_TRANS_PERM_LOG_RES, 1724 XFS_INACTIVE_LOG_COUNT); 1725 if (error) { 1726 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1727 xfs_trans_cancel(tp, 0); 1728 return VN_INACTIVE_CACHE; 1729 } 1730 1731 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1732 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1733 xfs_trans_ihold(tp, ip); 1734 } 1735 1736 /* 1737 * If there are attributes associated with the file 1738 * then blow them away now. The code calls a routine 1739 * that recursively deconstructs the attribute fork. 1740 * We need to just commit the current transaction 1741 * because we can't use it for xfs_attr_inactive(). 1742 */ 1743 if (ip->i_d.di_anextents > 0) { 1744 error = xfs_inactive_attrs(ip, &tp); 1745 /* 1746 * If we got an error, the transaction is already 1747 * cancelled, and the inode is unlocked. Just get out. 1748 */ 1749 if (error) 1750 return VN_INACTIVE_CACHE; 1751 } else if (ip->i_afp) { 1752 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1753 } 1754 1755 /* 1756 * Free the inode. 1757 */ 1758 XFS_BMAP_INIT(&free_list, &first_block); 1759 error = xfs_ifree(tp, ip, &free_list); 1760 if (error) { 1761 /* 1762 * If we fail to free the inode, shut down. The cancel 1763 * might do that, we need to make sure. Otherwise the 1764 * inode might be lost for a long time or forever. 1765 */ 1766 if (!XFS_FORCED_SHUTDOWN(mp)) { 1767 cmn_err(CE_NOTE, 1768 "xfs_inactive: xfs_ifree() returned an error = %d on %s", 1769 error, mp->m_fsname); 1770 xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); 1771 } 1772 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 1773 } else { 1774 /* 1775 * Credit the quota account(s). The inode is gone. 1776 */ 1777 XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 1778 1779 /* 1780 * Just ignore errors at this point. There is 1781 * nothing we can do except to try to keep going. 1782 */ 1783 (void) xfs_bmap_finish(&tp, &free_list, first_block, 1784 &committed); 1785 (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 1786 } 1787 /* 1788 * Release the dquots held by inode, if any. 1789 */ 1790 XFS_QM_DQDETACH(mp, ip); 1791 1792 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1793 1794 out: 1795 return VN_INACTIVE_CACHE; 1796} 1797 1798 1799/* 1800 * xfs_lookup 1801 */ 1802STATIC int 1803xfs_lookup( 1804 bhv_desc_t *dir_bdp, 1805 vname_t *dentry, 1806 xfs_vnode_t **vpp, 1807 int flags, 1808 xfs_vnode_t *rdir, 1809 cred_t *credp) 1810{ 1811 xfs_inode_t *dp, *ip; 1812 xfs_ino_t e_inum; 1813 int error; 1814 uint lock_mode; 1815 xfs_vnode_t *dir_vp; 1816 1817 dir_vp = BHV_TO_VNODE(dir_bdp); 1818 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 1819 1820 dp = XFS_BHVTOI(dir_bdp); 1821 1822 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 1823 return XFS_ERROR(EIO); 1824 1825 lock_mode = xfs_ilock_map_shared(dp); 1826 error = xfs_dir_lookup_int(dir_bdp, lock_mode, dentry, &e_inum, &ip); 1827 if (!error) { 1828 *vpp = XFS_ITOV(ip); 1829 ITRACE(ip); 1830 } 1831 xfs_iunlock_map_shared(dp, lock_mode); 1832 return error; 1833} 1834 1835 1836/* 1837 * xfs_create (create a new file). 1838 */ 1839STATIC int 1840xfs_create( 1841 bhv_desc_t *dir_bdp, 1842 vname_t *dentry, 1843 xfs_vattr_t *vap, 1844 xfs_vnode_t **vpp, 1845 cred_t *credp) 1846{ 1847 char *name = VNAME(dentry); 1848 xfs_vnode_t *dir_vp; 1849 xfs_inode_t *dp, *ip; 1850 xfs_vnode_t *vp=NULL; 1851 xfs_trans_t *tp; 1852 xfs_mount_t *mp; 1853 xfs_dev_t rdev; 1854 int error; 1855 xfs_bmap_free_t free_list; 1856 xfs_fsblock_t first_block; 1857 boolean_t dp_joined_to_trans; 1858 int dm_event_sent = 0; 1859 uint cancel_flags; 1860 int committed; 1861 xfs_prid_t prid; 1862 struct xfs_dquot *udqp, *gdqp; 1863 uint resblks; 1864 int dm_di_mode; 1865 int namelen; 1866 1867 ASSERT(!*vpp); 1868 dir_vp = BHV_TO_VNODE(dir_bdp); 1869 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 1870 1871 dp = XFS_BHVTOI(dir_bdp); 1872 mp = dp->i_mount; 1873 1874 dm_di_mode = vap->va_mode; 1875 namelen = VNAMELEN(dentry); 1876 1877 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { 1878 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 1879 dir_vp, DM_RIGHT_NULL, NULL, 1880 DM_RIGHT_NULL, name, NULL, 1881 dm_di_mode, 0, 0); 1882 1883 if (error) 1884 return error; 1885 dm_event_sent = 1; 1886 } 1887 1888 if (XFS_FORCED_SHUTDOWN(mp)) 1889 return XFS_ERROR(EIO); 1890 1891 /* Return through std_return after this point. */ 1892 1893 udqp = gdqp = NULL; 1894 1895 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1896 prid = dp->i_d.di_projid; 1897 else if (vap->va_mask & XFS_AT_PROJID) 1898 prid = (xfs_prid_t)vap->va_projid; 1899 else 1900 prid = (xfs_prid_t)dfltprid; 1901 1902 /* 1903 * Make sure that we have allocated dquot(s) on disk. 1904 */ 1905 error = XFS_QM_DQVOPALLOC(mp, dp, 1906 current_fsuid(credp), current_fsgid(credp), prid, 1907 XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); 1908 if (error) 1909 goto std_return; 1910 1911 ip = NULL; 1912 dp_joined_to_trans = B_FALSE; 1913 1914 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); 1915 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1916 resblks = XFS_CREATE_SPACE_RES(mp, namelen); 1917 /* 1918 * Initially assume that the file does not exist and 1919 * reserve the resources for that case. If that is not 1920 * the case we'll drop the one we have and get a more 1921 * appropriate transaction later. 1922 */ 1923 error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, 1924 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); 1925 if (error == ENOSPC) { 1926 resblks = 0; 1927 error = xfs_trans_reserve(tp, 0, XFS_CREATE_LOG_RES(mp), 0, 1928 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); 1929 } 1930 if (error) { 1931 cancel_flags = 0; 1932 dp = NULL; 1933 goto error_return; 1934 } 1935 1936 xfs_ilock(dp, XFS_ILOCK_EXCL); 1937 1938 XFS_BMAP_INIT(&free_list, &first_block); 1939 1940 ASSERT(ip == NULL); 1941 1942 /* 1943 * Reserve disk quota and the inode. 1944 */ 1945 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 1946 if (error) 1947 goto error_return; 1948 1949 if (resblks == 0 && 1950 (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen))) 1951 goto error_return; 1952 rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0; 1953 error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1, 1954 rdev, credp, prid, resblks > 0, 1955 &ip, &committed); 1956 if (error) { 1957 if (error == ENOSPC) 1958 goto error_return; 1959 goto abort_return; 1960 } 1961 ITRACE(ip); 1962 1963 /* 1964 * At this point, we've gotten a newly allocated inode. 1965 * It is locked (and joined to the transaction). 1966 */ 1967 1968 ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); 1969 1970 /* 1971 * Now we join the directory inode to the transaction. 1972 * We do not do it earlier because xfs_dir_ialloc 1973 * might commit the previous transaction (and release 1974 * all the locks). 1975 */ 1976 1977 VN_HOLD(dir_vp); 1978 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1979 dp_joined_to_trans = B_TRUE; 1980 1981 error = XFS_DIR_CREATENAME(mp, tp, dp, name, namelen, ip->i_ino, 1982 &first_block, &free_list, 1983 resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1984 if (error) { 1985 ASSERT(error != ENOSPC); 1986 goto abort_return; 1987 } 1988 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1989 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1990 1991 /* 1992 * If this is a synchronous mount, make sure that the 1993 * create transaction goes to disk before returning to 1994 * the user. 1995 */ 1996 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 1997 xfs_trans_set_sync(tp); 1998 } 1999 2000 dp->i_gen++; 2001 2002 /* 2003 * Attach the dquot(s) to the inodes and modify them incore. 2004 * These ids of the inode couldn't have changed since the new 2005 * inode has been locked ever since it was created. 2006 */ 2007 XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); 2008 2009 /* 2010 * xfs_trans_commit normally decrements the vnode ref count 2011 * when it unlocks the inode. Since we want to return the 2012 * vnode to the caller, we bump the vnode ref count now. 2013 */ 2014 IHOLD(ip); 2015 vp = XFS_ITOV(ip); 2016 2017 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 2018 if (error) { 2019 xfs_bmap_cancel(&free_list); 2020 goto abort_rele; 2021 } 2022 2023 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2024 if (error) { 2025 IRELE(ip); 2026 tp = NULL; 2027 goto error_return; 2028 } 2029 2030 XFS_QM_DQRELE(mp, udqp); 2031 XFS_QM_DQRELE(mp, gdqp); 2032 2033 /* 2034 * Propagate the fact that the vnode changed after the 2035 * xfs_inode locks have been released. 2036 */ 2037 XVOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 3); 2038 2039 *vpp = vp; 2040 2041 /* Fallthrough to std_return with error = 0 */ 2042 2043std_return: 2044 if ( (*vpp || (error != 0 && dm_event_sent != 0)) && 2045 DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp), 2046 DM_EVENT_POSTCREATE)) { 2047 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 2048 dir_vp, DM_RIGHT_NULL, 2049 *vpp ? vp:NULL, 2050 DM_RIGHT_NULL, name, NULL, 2051 dm_di_mode, error, 0); 2052 } 2053 return error; 2054 2055 abort_return: 2056 cancel_flags |= XFS_TRANS_ABORT; 2057 /* FALLTHROUGH */ 2058 2059 error_return: 2060 if (tp != NULL) 2061 xfs_trans_cancel(tp, cancel_flags); 2062 2063 if (!dp_joined_to_trans && (dp != NULL)) 2064 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2065 XFS_QM_DQRELE(mp, udqp); 2066 XFS_QM_DQRELE(mp, gdqp); 2067 2068 goto std_return; 2069 2070 abort_rele: 2071 /* 2072 * Wait until after the current transaction is aborted to 2073 * release the inode. This prevents recursive transactions 2074 * and deadlocks from xfs_inactive. 2075 */ 2076 cancel_flags |= XFS_TRANS_ABORT; 2077 xfs_trans_cancel(tp, cancel_flags); 2078 IRELE(ip); 2079 2080 XFS_QM_DQRELE(mp, udqp); 2081 XFS_QM_DQRELE(mp, gdqp); 2082 2083 goto std_return; 2084} 2085 2086#ifdef DEBUG 2087/* 2088 * Some counters to see if (and how often) we are hitting some deadlock 2089 * prevention code paths. 2090 */ 2091 2092int xfs_rm_locks; 2093int xfs_rm_lock_delays; 2094int xfs_rm_attempts; 2095#endif 2096 2097/* 2098 * The following routine will lock the inodes associated with the 2099 * directory and the named entry in the directory. The locks are 2100 * acquired in increasing inode number. 2101 * 2102 * If the entry is "..", then only the directory is locked. The 2103 * vnode ref count will still include that from the .. entry in 2104 * this case. 2105 * 2106 * There is a deadlock we need to worry about. If the locked directory is 2107 * in the AIL, it might be blocking up the log. The next inode we lock 2108 * could be already locked by another thread waiting for log space (e.g 2109 * a permanent log reservation with a long running transaction (see 2110 * xfs_itruncate_finish)). To solve this, we must check if the directory 2111 * is in the ail and use lock_nowait. If we can't lock, we need to 2112 * drop the inode lock on the directory and try again. xfs_iunlock will 2113 * potentially push the tail if we were holding up the log. 2114 */ 2115STATIC int 2116xfs_lock_dir_and_entry( 2117 xfs_inode_t *dp, 2118 vname_t *dentry, 2119 xfs_inode_t *ip) /* inode of entry 'name' */ 2120{ 2121 int attempts; 2122 xfs_ino_t e_inum; 2123 xfs_inode_t *ips[2]; 2124 xfs_log_item_t *lp; 2125 2126#ifdef DEBUG 2127 xfs_rm_locks++; 2128#endif 2129 attempts = 0; 2130 2131again: 2132 xfs_ilock(dp, XFS_ILOCK_EXCL); 2133 2134 e_inum = ip->i_ino; 2135 2136 ITRACE(ip); 2137 2138 /* 2139 * We want to lock in increasing inum. Since we've already 2140 * acquired the lock on the directory, we may need to release 2141 * if if the inum of the entry turns out to be less. 2142 */ 2143 if (e_inum > dp->i_ino) { 2144 /* 2145 * We are already in the right order, so just 2146 * lock on the inode of the entry. 2147 * We need to use nowait if dp is in the AIL. 2148 */ 2149 2150 lp = (xfs_log_item_t *)dp->i_itemp; 2151 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 2152 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2153 attempts++; 2154#ifdef DEBUG 2155 xfs_rm_attempts++; 2156#endif 2157 2158 /* 2159 * Unlock dp and try again. 2160 * xfs_iunlock will try to push the tail 2161 * if the inode is in the AIL. 2162 */ 2163 2164 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2165 2166 if ((attempts % 5) == 0) { 2167 delay(1); /* Don't just spin the CPU */ 2168#ifdef DEBUG 2169 xfs_rm_lock_delays++; 2170#endif 2171 } 2172 goto again; 2173 } 2174 } else { 2175 xfs_ilock(ip, XFS_ILOCK_EXCL); 2176 } 2177 } else if (e_inum < dp->i_ino) { 2178 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2179 2180 ips[0] = ip; 2181 ips[1] = dp; 2182 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 2183 } 2184 /* else e_inum == dp->i_ino */ 2185 /* This can happen if we're asked to lock /x/.. 2186 * the entry is "..", which is also the parent directory. 2187 */ 2188 2189 return 0; 2190} 2191 2192#ifdef DEBUG 2193int xfs_locked_n; 2194int xfs_small_retries; 2195int xfs_middle_retries; 2196int xfs_lots_retries; 2197int xfs_lock_delays; 2198#endif 2199 2200/* 2201 * The following routine will lock n inodes in exclusive mode. 2202 * We assume the caller calls us with the inodes in i_ino order. 2203 * 2204 * We need to detect deadlock where an inode that we lock 2205 * is in the AIL and we start waiting for another inode that is locked 2206 * by a thread in a long running transaction (such as truncate). This can 2207 * result in deadlock since the long running trans might need to wait 2208 * for the inode we just locked in order to push the tail and free space 2209 * in the log. 2210 */ 2211void 2212xfs_lock_inodes( 2213 xfs_inode_t **ips, 2214 int inodes, 2215 int first_locked, 2216 uint lock_mode) 2217{ 2218 int attempts = 0, i, j, try_lock; 2219 xfs_log_item_t *lp; 2220 2221 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 2222 2223 if (first_locked) { 2224 try_lock = 1; 2225 i = 1; 2226 } else { 2227 try_lock = 0; 2228 i = 0; 2229 } 2230 2231again: 2232 for (; i < inodes; i++) { 2233 ASSERT(ips[i]); 2234 2235 if (i && (ips[i] == ips[i-1])) /* Already locked */ 2236 continue; 2237 2238 /* 2239 * If try_lock is not set yet, make sure all locked inodes 2240 * are not in the AIL. 2241 * If any are, set try_lock to be used later. 2242 */ 2243 2244 if (!try_lock) { 2245 for (j = (i - 1); j >= 0 && !try_lock; j--) { 2246 lp = (xfs_log_item_t *)ips[j]->i_itemp; 2247 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 2248 try_lock++; 2249 } 2250 } 2251 } 2252 2253 /* 2254 * If any of the previous locks we have locked is in the AIL, 2255 * we must TRY to get the second and subsequent locks. If 2256 * we can't get any, we must release all we have 2257 * and try again. 2258 */ 2259 2260 if (try_lock) { 2261 /* try_lock must be 0 if i is 0. */ 2262 /* 2263 * try_lock means we have an inode locked 2264 * that is in the AIL. 2265 */ 2266 ASSERT(i != 0); 2267 if (!xfs_ilock_nowait(ips[i], lock_mode)) { 2268 attempts++; 2269 2270 /* 2271 * Unlock all previous guys and try again. 2272 * xfs_iunlock will try to push the tail 2273 * if the inode is in the AIL. 2274 */ 2275 2276 for(j = i - 1; j >= 0; j--) { 2277 2278 /* 2279 * Check to see if we've already 2280 * unlocked this one. 2281 * Not the first one going back, 2282 * and the inode ptr is the same. 2283 */ 2284 if ((j != (i - 1)) && ips[j] == 2285 ips[j+1]) 2286 continue; 2287 2288 xfs_iunlock(ips[j], lock_mode); 2289 } 2290 2291 if ((attempts % 5) == 0) { 2292 delay(1); /* Don't just spin the CPU */ 2293#ifdef DEBUG 2294 xfs_lock_delays++; 2295#endif 2296 } 2297 i = 0; 2298 try_lock = 0; 2299 goto again; 2300 } 2301 } else { 2302 xfs_ilock(ips[i], lock_mode); 2303 } 2304 } 2305 2306#ifdef DEBUG 2307 if (attempts) { 2308 if (attempts < 5) xfs_small_retries++; 2309 else if (attempts < 100) xfs_middle_retries++; 2310 else xfs_lots_retries++; 2311 } else { 2312 xfs_locked_n++; 2313 } 2314#endif 2315} 2316 2317#ifdef DEBUG 2318#define REMOVE_DEBUG_TRACE(x) {remove_which_error_return = (x);} 2319int remove_which_error_return = 0; 2320#else /* ! DEBUG */ 2321#define REMOVE_DEBUG_TRACE(x) 2322#endif /* ! DEBUG */ 2323 2324extern int xfs_remove(bhv_desc_t *, bhv_desc_t *, vname_t *, cred_t *); 2325/* 2326 * xfs_remove 2327 * 2328 */ 2329int 2330xfs_remove( 2331 bhv_desc_t *dir_bdp, 2332 bhv_desc_t *vp_bdp, 2333 vname_t *dentry, 2334 cred_t *credp) 2335{ 2336 xfs_vnode_t *dir_vp; 2337 xfs_vnode_t *xvp; 2338 char *name = VNAME(dentry); 2339 xfs_inode_t *dp, *ip; 2340 xfs_trans_t *tp = NULL; 2341 xfs_mount_t *mp; 2342 int error = 0; 2343 xfs_bmap_free_t free_list; 2344 xfs_fsblock_t first_block; 2345 int cancel_flags; 2346 int committed; 2347 int dm_di_mode = 0; 2348 int link_zero; 2349 uint resblks; 2350 int namelen; 2351 2352 dir_vp = BHV_TO_VNODE(dir_bdp); 2353 xvp = BHV_TO_VNODE(vp_bdp); 2354 2355 printf("xfs_remove: dvp %p vp %p\n",dir_vp,xvp); 2356 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 2357 2358 dp = XFS_BHVTOI(dir_bdp); 2359 mp = dp->i_mount; 2360 2361 if (XFS_FORCED_SHUTDOWN(mp)) 2362 return XFS_ERROR(EIO); 2363 2364 namelen = VNAMELEN(dentry); 2365 2366 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { 2367 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, 2368 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 2369 name, NULL, 0, 0, 0); 2370 if (error) 2371 return error; 2372 } 2373 2374 /* From this point on, return through std_return */ 2375 ip = NULL; 2376 2377 /* 2378 * We need to get a reference to ip before we get our log 2379 * reservation. The reason for this is that we cannot call 2380 * xfs_iget for an inode for which we do not have a reference 2381 * once we've acquired a log reservation. This is because the 2382 * inode we are trying to get might be in xfs_inactive going 2383 * for a log reservation. Since we'll have to wait for the 2384 * inactive code to complete before returning from xfs_iget, 2385 * we need to make sure that we don't have log space reserved 2386 * when we call xfs_iget. Instead we get an unlocked reference 2387 * to the inode before getting our log reservation. 2388 */ 2389#ifdef RMC 2390 error = xfs_get_dir_entry(dentry, &ip); 2391#endif 2392 /* FreeBSD has already done the lookup */ 2393 ip = xvp->v_inode; 2394 VN_HOLD(xvp); 2395 2396 if (error) { 2397 REMOVE_DEBUG_TRACE(__LINE__); 2398 goto std_return; 2399 } 2400 2401 dm_di_mode = ip->i_d.di_mode; 2402 2403 vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 2404 2405 ITRACE(ip); 2406 2407 error = XFS_QM_DQATTACH(mp, dp, 0); 2408 if (!error && dp != ip) 2409 error = XFS_QM_DQATTACH(mp, ip, 0); 2410 if (error) { 2411 REMOVE_DEBUG_TRACE(__LINE__); 2412 IRELE(ip); 2413 goto std_return; 2414 } 2415 2416 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); 2417 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2418 /* 2419 * We try to get the real space reservation first, 2420 * allowing for directory btree deletion(s) implying 2421 * possible bmap insert(s). If we can't get the space 2422 * reservation then we use 0 instead, and avoid the bmap 2423 * btree insert(s) in the directory code by, if the bmap 2424 * insert tries to happen, instead trimming the LAST 2425 * block from the directory. 2426 */ 2427 resblks = XFS_REMOVE_SPACE_RES(mp); 2428 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 2429 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 2430 if (error == ENOSPC) { 2431 resblks = 0; 2432 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 2433 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 2434 } 2435 if (error) { 2436 ASSERT(error != ENOSPC); 2437 REMOVE_DEBUG_TRACE(__LINE__); 2438 xfs_trans_cancel(tp, 0); 2439 IRELE(ip); 2440 return error; 2441 } 2442 2443 error = xfs_lock_dir_and_entry(dp, dentry, ip); 2444 if (error) { 2445 REMOVE_DEBUG_TRACE(__LINE__); 2446 xfs_trans_cancel(tp, cancel_flags); 2447 IRELE(ip); 2448 goto std_return; 2449 } 2450 2451 /* 2452 * At this point, we've gotten both the directory and the entry 2453 * inodes locked. 2454 */ 2455 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2456 if (dp != ip) { 2457 /* 2458 * Increment vnode ref count only in this case since 2459 * there's an extra vnode reference in the case where 2460 * dp == ip. 2461 */ 2462 IHOLD(dp); 2463 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2464 } 2465 2466 /* 2467 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. 2468 */ 2469 XFS_BMAP_INIT(&free_list, &first_block); 2470 error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, ip->i_ino, 2471 &first_block, &free_list, 0); 2472 if (error) { 2473 ASSERT(error != ENOENT); 2474 REMOVE_DEBUG_TRACE(__LINE__); 2475 goto error1; 2476 } 2477 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2478 2479 dp->i_gen++; 2480 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2481 2482 error = xfs_droplink(tp, ip); 2483 if (error) { 2484 REMOVE_DEBUG_TRACE(__LINE__); 2485 goto error1; 2486 } 2487 2488 /* Determine if this is the last link while 2489 * we are in the transaction. 2490 */ 2491 link_zero = (ip)->i_d.di_nlink==0; 2492 2493 /* 2494 * Take an extra ref on the inode so that it doesn't 2495 * go to xfs_inactive() from within the commit. 2496 */ 2497 IHOLD(ip); 2498 2499 /* 2500 * If this is a synchronous mount, make sure that the 2501 * remove transaction goes to disk before returning to 2502 * the user. 2503 */ 2504 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2505 xfs_trans_set_sync(tp); 2506 } 2507 2508 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 2509 if (error) { 2510 REMOVE_DEBUG_TRACE(__LINE__); 2511 goto error_rele; 2512 } 2513 2514 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2515 if (error) { 2516 IRELE(ip); 2517 goto std_return; 2518 } 2519 2520 /* 2521 * Before we drop our extra reference to the inode, purge it 2522 * from the refcache if it is there. By waiting until afterwards 2523 * to do the IRELE, we ensure that we won't go inactive in the 2524 * xfs_refcache_purge_ip routine (although that would be OK). 2525 */ 2526 xfs_refcache_purge_ip(ip); 2527 2528 vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 2529 2530 /* 2531 * Let interposed file systems know about removed links. 2532 */ 2533 XVOP_LINK_REMOVED(XFS_ITOV(ip), dir_vp, link_zero); 2534 2535 IRELE(ip); 2536 2537/* Fall through to std_return with error = 0 */ 2538 std_return: 2539 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, 2540 DM_EVENT_POSTREMOVE)) { 2541 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 2542 dir_vp, DM_RIGHT_NULL, 2543 NULL, DM_RIGHT_NULL, 2544 name, NULL, dm_di_mode, error, 0); 2545 } 2546 return error; 2547 2548 error1: 2549 xfs_bmap_cancel(&free_list); 2550 cancel_flags |= XFS_TRANS_ABORT; 2551 xfs_trans_cancel(tp, cancel_flags); 2552 goto std_return; 2553 2554 error_rele: 2555 /* 2556 * In this case make sure to not release the inode until after 2557 * the current transaction is aborted. Releasing it beforehand 2558 * can cause us to go to xfs_inactive and start a recursive 2559 * transaction which can easily deadlock with the current one. 2560 */ 2561 xfs_bmap_cancel(&free_list); 2562 cancel_flags |= XFS_TRANS_ABORT; 2563 xfs_trans_cancel(tp, cancel_flags); 2564 2565 /* 2566 * Before we drop our extra reference to the inode, purge it 2567 * from the refcache if it is there. By waiting until afterwards 2568 * to do the IRELE, we ensure that we won't go inactive in the 2569 * xfs_refcache_purge_ip routine (although that would be OK). 2570 */ 2571 xfs_refcache_purge_ip(ip); 2572 2573 IRELE(ip); 2574 2575 goto std_return; 2576} 2577 2578 2579/* 2580 * xfs_link 2581 * 2582 */ 2583STATIC int 2584xfs_link( 2585 bhv_desc_t *target_dir_bdp, 2586 xfs_vnode_t *src_vp, 2587 vname_t *dentry, 2588 cred_t *credp) 2589{ 2590 xfs_inode_t *tdp, *sip; 2591 xfs_trans_t *tp; 2592 xfs_mount_t *mp; 2593 xfs_inode_t *ips[2]; 2594 int error; 2595 xfs_bmap_free_t free_list; 2596 xfs_fsblock_t first_block; 2597 int cancel_flags; 2598 int committed; 2599 xfs_vnode_t *target_dir_vp; 2600 int resblks; 2601 char *target_name = VNAME(dentry); 2602 int target_namelen; 2603 2604 target_dir_vp = BHV_TO_VNODE(target_dir_bdp); 2605 vn_trace_entry(target_dir_vp, __FUNCTION__, (inst_t *)__return_address); 2606 vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); 2607 2608 target_namelen = VNAMELEN(dentry); 2609 if (VN_ISDIR(src_vp)) 2610 return XFS_ERROR(EPERM); 2611 2612 sip = xfs_vtoi(src_vp); 2613 tdp = XFS_BHVTOI(target_dir_bdp); 2614 mp = tdp->i_mount; 2615 if (XFS_FORCED_SHUTDOWN(mp)) 2616 return XFS_ERROR(EIO); 2617 2618 if (DM_EVENT_ENABLED(src_vp->v_vfsp, tdp, DM_EVENT_LINK)) { 2619 error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK, 2620 target_dir_vp, DM_RIGHT_NULL, 2621 src_vp, DM_RIGHT_NULL, 2622 target_name, NULL, 0, 0, 0); 2623 if (error) 2624 return error; 2625 } 2626 2627 /* Return through std_return after this point. */ 2628 2629 error = XFS_QM_DQATTACH(mp, sip, 0); 2630 if (!error && sip != tdp) 2631 error = XFS_QM_DQATTACH(mp, tdp, 0); 2632 if (error) 2633 goto std_return; 2634 2635 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); 2636 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2637 resblks = XFS_LINK_SPACE_RES(mp, target_namelen); 2638 error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0, 2639 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 2640 if (error == ENOSPC) { 2641 resblks = 0; 2642 error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0, 2643 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 2644 } 2645 if (error) { 2646 cancel_flags = 0; 2647 goto error_return; 2648 } 2649 2650 if (sip->i_ino < tdp->i_ino) { 2651 ips[0] = sip; 2652 ips[1] = tdp; 2653 } else { 2654 ips[0] = tdp; 2655 ips[1] = sip; 2656 } 2657 2658 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 2659 2660 /* 2661 * Increment vnode ref counts since xfs_trans_commit & 2662 * xfs_trans_cancel will both unlock the inodes and 2663 * decrement the associated ref counts. 2664 */ 2665 VN_HOLD(src_vp); 2666 VN_HOLD(target_dir_vp); 2667 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 2668 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 2669 2670 /* 2671 * If the source has too many links, we can't make any more to it. 2672 */ 2673 if (sip->i_d.di_nlink >= XFS_MAXLINK) { 2674 error = XFS_ERROR(EMLINK); 2675 goto error_return; 2676 } 2677 2678 /* 2679 * If we are using project inheritance, we only allow hard link 2680 * creation in our tree when the project IDs are the same; else 2681 * the tree quota mechanism could be circumvented. 2682 */ 2683 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 2684 (tdp->i_d.di_projid != sip->i_d.di_projid))) { 2685 error = XFS_ERROR(EPERM); 2686 goto error_return; 2687 } 2688 2689 if (resblks == 0 && 2690 (error = XFS_DIR_CANENTER(mp, tp, tdp, target_name, 2691 target_namelen))) 2692 goto error_return; 2693 2694 XFS_BMAP_INIT(&free_list, &first_block); 2695 2696 error = XFS_DIR_CREATENAME(mp, tp, tdp, target_name, target_namelen, 2697 sip->i_ino, &first_block, &free_list, 2698 resblks); 2699 if (error) 2700 goto abort_return; 2701 xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2702 tdp->i_gen++; 2703 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 2704 2705 error = xfs_bumplink(tp, sip); 2706 if (error) { 2707 goto abort_return; 2708 } 2709 2710 /* 2711 * If this is a synchronous mount, make sure that the 2712 * link transaction goes to disk before returning to 2713 * the user. 2714 */ 2715 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2716 xfs_trans_set_sync(tp); 2717 } 2718 2719 error = xfs_bmap_finish (&tp, &free_list, first_block, &committed); 2720 if (error) { 2721 xfs_bmap_cancel(&free_list); 2722 goto abort_return; 2723 } 2724 2725 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2726 if (error) { 2727 goto std_return; 2728 } 2729 2730 /* Fall through to std_return with error = 0. */ 2731std_return: 2732 if (DM_EVENT_ENABLED(src_vp->v_vfsp, sip, 2733 DM_EVENT_POSTLINK)) { 2734 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK, 2735 target_dir_vp, DM_RIGHT_NULL, 2736 src_vp, DM_RIGHT_NULL, 2737 target_name, NULL, 0, error, 0); 2738 } 2739 return error; 2740 2741 abort_return: 2742 cancel_flags |= XFS_TRANS_ABORT; 2743 /* FALLTHROUGH */ 2744 2745 error_return: 2746 xfs_trans_cancel(tp, cancel_flags); 2747 goto std_return; 2748} 2749/* 2750 * xfs_mkdir 2751 * 2752 */ 2753STATIC int 2754xfs_mkdir( 2755 bhv_desc_t *dir_bdp, 2756 vname_t *dentry, 2757 xfs_vattr_t *vap, 2758 xfs_vnode_t **vpp, 2759 cred_t *credp) 2760{ 2761 char *dir_name = VNAME(dentry); 2762 xfs_inode_t *dp; 2763 xfs_inode_t *cdp; /* inode of created dir */ 2764 xfs_vnode_t *cvp; /* vnode of created dir */ 2765 xfs_trans_t *tp; 2766 xfs_mount_t *mp; 2767 int cancel_flags; 2768 int error; 2769 int committed; 2770 xfs_bmap_free_t free_list; 2771 xfs_fsblock_t first_block; 2772 xfs_vnode_t *dir_vp; 2773 boolean_t dp_joined_to_trans; 2774 boolean_t created = B_FALSE; 2775 int dm_event_sent = 0; 2776 xfs_prid_t prid; 2777 struct xfs_dquot *udqp, *gdqp; 2778 uint resblks; 2779 int dm_di_mode; 2780 int dir_namelen; 2781 2782 dir_vp = BHV_TO_VNODE(dir_bdp); 2783 dp = XFS_BHVTOI(dir_bdp); 2784 mp = dp->i_mount; 2785 2786 if (XFS_FORCED_SHUTDOWN(mp)) 2787 return XFS_ERROR(EIO); 2788 2789 dir_namelen = VNAMELEN(dentry); 2790 2791 tp = NULL; 2792 dp_joined_to_trans = B_FALSE; 2793 dm_di_mode = vap->va_mode; 2794 2795 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { 2796 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 2797 dir_vp, DM_RIGHT_NULL, NULL, 2798 DM_RIGHT_NULL, dir_name, NULL, 2799 dm_di_mode, 0, 0); 2800 if (error) 2801 return error; 2802 dm_event_sent = 1; 2803 } 2804 2805 /* Return through std_return after this point. */ 2806 2807 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 2808 2809 mp = dp->i_mount; 2810 udqp = gdqp = NULL; 2811 2812 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 2813 prid = dp->i_d.di_projid; 2814 else if (vap->va_mask & XFS_AT_PROJID) 2815 prid = (xfs_prid_t)vap->va_projid; 2816 else 2817 prid = (xfs_prid_t)dfltprid; 2818 2819 /* 2820 * Make sure that we have allocated dquot(s) on disk. 2821 */ 2822 error = XFS_QM_DQVOPALLOC(mp, dp, 2823 current_fsuid(credp), current_fsgid(credp), prid, 2824 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 2825 if (error) 2826 goto std_return; 2827 2828 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); 2829 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2830 resblks = XFS_MKDIR_SPACE_RES(mp, dir_namelen); 2831 error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0, 2832 XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT); 2833 if (error == ENOSPC) { 2834 resblks = 0; 2835 error = xfs_trans_reserve(tp, 0, XFS_MKDIR_LOG_RES(mp), 0, 2836 XFS_TRANS_PERM_LOG_RES, 2837 XFS_MKDIR_LOG_COUNT); 2838 } 2839 if (error) { 2840 cancel_flags = 0; 2841 dp = NULL; 2842 goto error_return; 2843 } 2844 2845 xfs_ilock(dp, XFS_ILOCK_EXCL); 2846 2847 /* 2848 * Check for directory link count overflow. 2849 */ 2850 if (dp->i_d.di_nlink >= XFS_MAXLINK) { 2851 error = XFS_ERROR(EMLINK); 2852 goto error_return; 2853 } 2854 2855 /* 2856 * Reserve disk quota and the inode. 2857 */ 2858 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 2859 if (error) 2860 goto error_return; 2861 2862 if (resblks == 0 && 2863 (error = XFS_DIR_CANENTER(mp, tp, dp, dir_name, dir_namelen))) 2864 goto error_return; 2865 /* 2866 * create the directory inode. 2867 */ 2868 error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 2, 2869 0, credp, prid, resblks > 0, 2870 &cdp, NULL); 2871 if (error) { 2872 if (error == ENOSPC) 2873 goto error_return; 2874 goto abort_return; 2875 } 2876 ITRACE(cdp); 2877 2878 /* 2879 * Now we add the directory inode to the transaction. 2880 * We waited until now since xfs_dir_ialloc might start 2881 * a new transaction. Had we joined the transaction 2882 * earlier, the locks might have gotten released. 2883 */ 2884 VN_HOLD(dir_vp); 2885 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2886 dp_joined_to_trans = B_TRUE; 2887 2888 XFS_BMAP_INIT(&free_list, &first_block); 2889 2890 error = XFS_DIR_CREATENAME(mp, tp, dp, dir_name, dir_namelen, 2891 cdp->i_ino, &first_block, &free_list, 2892 resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 2893 if (error) { 2894 ASSERT(error != ENOSPC); 2895 goto error1; 2896 } 2897 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2898 2899 /* 2900 * Bump the in memory version number of the parent directory 2901 * so that other processes accessing it will recognize that 2902 * the directory has changed. 2903 */ 2904 dp->i_gen++; 2905 2906 error = XFS_DIR_INIT(mp, tp, cdp, dp); 2907 if (error) { 2908 goto error2; 2909 } 2910 2911 cdp->i_gen = 1; 2912 error = xfs_bumplink(tp, dp); 2913 if (error) { 2914 goto error2; 2915 } 2916 2917 cvp = XFS_ITOV(cdp); 2918 2919 created = B_TRUE; 2920 2921 *vpp = cvp; 2922 IHOLD(cdp); 2923 2924 /* 2925 * Attach the dquots to the new inode and modify the icount incore. 2926 */ 2927 XFS_QM_DQVOPCREATE(mp, tp, cdp, udqp, gdqp); 2928 2929 /* 2930 * If this is a synchronous mount, make sure that the 2931 * mkdir transaction goes to disk before returning to 2932 * the user. 2933 */ 2934 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2935 xfs_trans_set_sync(tp); 2936 } 2937 2938 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 2939 if (error) { 2940 IRELE(cdp); 2941 goto error2; 2942 } 2943 2944 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2945 XFS_QM_DQRELE(mp, udqp); 2946 XFS_QM_DQRELE(mp, gdqp); 2947 if (error) { 2948 IRELE(cdp); 2949 } 2950 2951 /* Fall through to std_return with error = 0 or errno from 2952 * xfs_trans_commit. */ 2953 2954std_return: 2955 if ( (created || (error != 0 && dm_event_sent != 0)) && 2956 DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp), 2957 DM_EVENT_POSTCREATE)) { 2958 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 2959 dir_vp, DM_RIGHT_NULL, 2960 created ? XFS_ITOV(cdp):NULL, 2961 DM_RIGHT_NULL, 2962 dir_name, NULL, 2963 dm_di_mode, error, 0); 2964 } 2965 return error; 2966 2967 error2: 2968 error1: 2969 xfs_bmap_cancel(&free_list); 2970 abort_return: 2971 cancel_flags |= XFS_TRANS_ABORT; 2972 error_return: 2973 xfs_trans_cancel(tp, cancel_flags); 2974 XFS_QM_DQRELE(mp, udqp); 2975 XFS_QM_DQRELE(mp, gdqp); 2976 2977 if (!dp_joined_to_trans && (dp != NULL)) { 2978 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2979 } 2980 2981 goto std_return; 2982} 2983 2984 2985/* 2986 * xfs_rmdir 2987 * 2988 */ 2989STATIC int 2990xfs_rmdir( 2991 bhv_desc_t *dir_bdp, 2992 vname_t *dentry, 2993 cred_t *credp) 2994{ 2995 char *name = VNAME(dentry); 2996 xfs_inode_t *dp; 2997 xfs_inode_t *cdp; /* child directory */ 2998 xfs_trans_t *tp; 2999 xfs_mount_t *mp; 3000 int error; 3001 xfs_bmap_free_t free_list; 3002 xfs_fsblock_t first_block; 3003 int cancel_flags; 3004 int committed; 3005 xfs_vnode_t *dir_vp; 3006 int dm_di_mode = 0; 3007 int last_cdp_link; 3008 int namelen; 3009 uint resblks; 3010 3011 dir_vp = BHV_TO_VNODE(dir_bdp); 3012 dp = XFS_BHVTOI(dir_bdp); 3013 mp = dp->i_mount; 3014 3015 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 3016 3017 if (XFS_FORCED_SHUTDOWN(XFS_BHVTOI(dir_bdp)->i_mount)) 3018 return XFS_ERROR(EIO); 3019 namelen = VNAMELEN(dentry); 3020 3021 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { 3022 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, 3023 dir_vp, DM_RIGHT_NULL, 3024 NULL, DM_RIGHT_NULL, 3025 name, NULL, 0, 0, 0); 3026 if (error) 3027 return XFS_ERROR(error); 3028 } 3029 3030 /* Return through std_return after this point. */ 3031 3032 cdp = NULL; 3033 3034 /* 3035 * We need to get a reference to cdp before we get our log 3036 * reservation. The reason for this is that we cannot call 3037 * xfs_iget for an inode for which we do not have a reference 3038 * once we've acquired a log reservation. This is because the 3039 * inode we are trying to get might be in xfs_inactive going 3040 * for a log reservation. Since we'll have to wait for the 3041 * inactive code to complete before returning from xfs_iget, 3042 * we need to make sure that we don't have log space reserved 3043 * when we call xfs_iget. Instead we get an unlocked reference 3044 * to the inode before getting our log reservation. 3045 */ 3046 error = xfs_get_dir_entry(dentry, &cdp); 3047 if (error) { 3048 REMOVE_DEBUG_TRACE(__LINE__); 3049 goto std_return; 3050 } 3051 mp = dp->i_mount; 3052 dm_di_mode = cdp->i_d.di_mode; 3053 3054 /* 3055 * Get the dquots for the inodes. 3056 */ 3057 error = XFS_QM_DQATTACH(mp, dp, 0); 3058 if (!error && dp != cdp) 3059 error = XFS_QM_DQATTACH(mp, cdp, 0); 3060 if (error) { 3061 IRELE(cdp); 3062 REMOVE_DEBUG_TRACE(__LINE__); 3063 goto std_return; 3064 } 3065 3066 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); 3067 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 3068 /* 3069 * We try to get the real space reservation first, 3070 * allowing for directory btree deletion(s) implying 3071 * possible bmap insert(s). If we can't get the space 3072 * reservation then we use 0 instead, and avoid the bmap 3073 * btree insert(s) in the directory code by, if the bmap 3074 * insert tries to happen, instead trimming the LAST 3075 * block from the directory. 3076 */ 3077 resblks = XFS_REMOVE_SPACE_RES(mp); 3078 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 3079 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT); 3080 if (error == ENOSPC) { 3081 resblks = 0; 3082 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 3083 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT); 3084 } 3085 if (error) { 3086 ASSERT(error != ENOSPC); 3087 cancel_flags = 0; 3088 IRELE(cdp); 3089 goto error_return; 3090 } 3091 XFS_BMAP_INIT(&free_list, &first_block); 3092 3093 /* 3094 * Now lock the child directory inode and the parent directory 3095 * inode in the proper order. This will take care of validating 3096 * that the directory entry for the child directory inode has 3097 * not changed while we were obtaining a log reservation. 3098 */ 3099 error = xfs_lock_dir_and_entry(dp, dentry, cdp); 3100 if (error) { 3101 xfs_trans_cancel(tp, cancel_flags); 3102 IRELE(cdp); 3103 goto std_return; 3104 } 3105 3106 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 3107 if (dp != cdp) { 3108 /* 3109 * Only increment the parent directory vnode count if 3110 * we didn't bump it in looking up cdp. The only time 3111 * we don't bump it is when we're looking up ".". 3112 */ 3113 VN_HOLD(dir_vp); 3114 } 3115 3116 ITRACE(cdp); 3117 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL); 3118 3119 ASSERT(cdp->i_d.di_nlink >= 2); 3120 if (cdp->i_d.di_nlink != 2) { 3121 error = XFS_ERROR(ENOTEMPTY); 3122 goto error_return; 3123 } 3124 if (!XFS_DIR_ISEMPTY(mp, cdp)) { 3125 error = XFS_ERROR(ENOTEMPTY); 3126 goto error_return; 3127 } 3128 3129 error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, cdp->i_ino, 3130 &first_block, &free_list, resblks); 3131 if (error) { 3132 goto error1; 3133 } 3134 3135 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3136 3137 /* 3138 * Bump the in memory generation count on the parent 3139 * directory so that other can know that it has changed. 3140 */ 3141 dp->i_gen++; 3142 3143 /* 3144 * Drop the link from cdp's "..". 3145 */ 3146 error = xfs_droplink(tp, dp); 3147 if (error) { 3148 goto error1; 3149 } 3150 3151 /* 3152 * Drop the link from dp to cdp. 3153 */ 3154 error = xfs_droplink(tp, cdp); 3155 if (error) { 3156 goto error1; 3157 } 3158 3159 /* 3160 * Drop the "." link from cdp to self. 3161 */ 3162 error = xfs_droplink(tp, cdp); 3163 if (error) { 3164 goto error1; 3165 } 3166 3167 /* Determine these before committing transaction */ 3168 last_cdp_link = (cdp)->i_d.di_nlink==0; 3169 3170 /* 3171 * Take an extra ref on the child vnode so that it 3172 * does not go to xfs_inactive() from within the commit. 3173 */ 3174 IHOLD(cdp); 3175 3176 /* 3177 * If this is a synchronous mount, make sure that the 3178 * rmdir transaction goes to disk before returning to 3179 * the user. 3180 */ 3181 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 3182 xfs_trans_set_sync(tp); 3183 } 3184 3185 error = xfs_bmap_finish (&tp, &free_list, first_block, &committed); 3186 if (error) { 3187 xfs_bmap_cancel(&free_list); 3188 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 3189 XFS_TRANS_ABORT)); 3190 IRELE(cdp); 3191 goto std_return; 3192 } 3193 3194 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 3195 if (error) { 3196 IRELE(cdp); 3197 goto std_return; 3198 } 3199 3200 3201 /* 3202 * Let interposed file systems know about removed links. 3203 */ 3204 XVOP_LINK_REMOVED(XFS_ITOV(cdp), dir_vp, last_cdp_link); 3205 3206 IRELE(cdp); 3207 3208 /* Fall through to std_return with error = 0 or the errno 3209 * from xfs_trans_commit. */ 3210 std_return: 3211 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_POSTREMOVE)) { 3212 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 3213 dir_vp, DM_RIGHT_NULL, 3214 NULL, DM_RIGHT_NULL, 3215 name, NULL, dm_di_mode, 3216 error, 0); 3217 } 3218 return error; 3219 3220 error1: 3221 xfs_bmap_cancel(&free_list); 3222 cancel_flags |= XFS_TRANS_ABORT; 3223 /* FALLTHROUGH */ 3224 3225 error_return: 3226 xfs_trans_cancel(tp, cancel_flags); 3227 goto std_return; 3228} 3229 3230 3231/* 3232 * xfs_readdir 3233 * 3234 * Read dp's entries starting at uiop->uio_offset and translate them into 3235 * bufsize bytes worth of struct dirents starting at bufbase. 3236 */ 3237STATIC int 3238xfs_readdir( 3239 bhv_desc_t *dir_bdp, 3240 uio_t *uiop, 3241 cred_t *credp, 3242 int *eofp) 3243{ 3244 xfs_inode_t *dp; 3245 xfs_trans_t *tp = NULL; 3246 int error = 0; 3247 uint lock_mode; 3248 3249 vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__, 3250 (inst_t *)__return_address); 3251 dp = XFS_BHVTOI(dir_bdp); 3252 3253 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) { 3254 return XFS_ERROR(EIO); 3255 } 3256 3257 lock_mode = xfs_ilock_map_shared(dp); 3258 error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp); 3259 xfs_iunlock_map_shared(dp, lock_mode); 3260 return error; 3261} 3262 3263 3264/* 3265 * xfs_symlink 3266 * 3267 */ 3268STATIC int 3269xfs_symlink( 3270 bhv_desc_t *dir_bdp, 3271 vname_t *dentry, 3272 xfs_vattr_t *vap, 3273 char *target_path, 3274 xfs_vnode_t **vpp, 3275 cred_t *credp) 3276{ 3277 xfs_trans_t *tp; 3278 xfs_mount_t *mp; 3279 xfs_inode_t *dp; 3280 xfs_inode_t *ip; 3281 int error; 3282 int pathlen; 3283 xfs_bmap_free_t free_list; 3284 xfs_fsblock_t first_block; 3285 boolean_t dp_joined_to_trans; 3286 xfs_vnode_t *dir_vp; 3287 uint cancel_flags; 3288 int committed; 3289 xfs_fileoff_t first_fsb; 3290 xfs_filblks_t fs_blocks; 3291 int nmaps; 3292 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 3293 xfs_daddr_t d; 3294 char *cur_chunk; 3295 int byte_cnt; 3296 int n; 3297 xfs_buf_t *bp; 3298 xfs_prid_t prid; 3299 struct xfs_dquot *udqp, *gdqp; 3300 uint resblks; 3301 char *link_name = VNAME(dentry); 3302 int link_namelen; 3303 struct thread *current = curthread; 3304 3305 *vpp = NULL; 3306 dir_vp = BHV_TO_VNODE(dir_bdp); 3307 dp = XFS_BHVTOI(dir_bdp); 3308 dp_joined_to_trans = B_FALSE; 3309 error = 0; 3310 ip = NULL; 3311 tp = NULL; 3312 3313 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 3314 3315 mp = dp->i_mount; 3316 3317 if (XFS_FORCED_SHUTDOWN(mp)) 3318 return XFS_ERROR(EIO); 3319 3320 link_namelen = VNAMELEN(dentry); 3321 3322 /* 3323 * Check component lengths of the target path name. 3324 */ 3325 pathlen = strlen(target_path); 3326 if (pathlen >= MAXPATHLEN) /* total string too long */ 3327 return XFS_ERROR(ENAMETOOLONG); 3328 if (pathlen >= MAXNAMELEN) { /* is any component too long? */ 3329 int len, total; 3330 char *path; 3331 3332 for(total = 0, path = target_path; total < pathlen;) { 3333 /* 3334 * Skip any slashes. 3335 */ 3336 while(*path == '/') { 3337 total++; 3338 path++; 3339 } 3340 3341 /* 3342 * Count up to the next slash or end of path. 3343 * Error out if the component is bigger than MAXNAMELEN. 3344 */ 3345 for(len = 0; *path != '/' && total < pathlen;total++, path++) { 3346 if (++len >= MAXNAMELEN) { 3347 error = ENAMETOOLONG; 3348 return error; 3349 } 3350 } 3351 } 3352 } 3353 3354 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_SYMLINK)) { 3355 error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dir_vp, 3356 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 3357 link_name, target_path, 0, 0, 0); 3358 if (error) 3359 return error; 3360 } 3361 3362 /* Return through std_return after this point. */ 3363 3364 udqp = gdqp = NULL; 3365 3366#ifdef XXXKAN 3367 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 3368 prid = dp->i_d.di_projid; 3369 else if (vap->va_mask & XFS_AT_PROJID) 3370 prid = (xfs_prid_t)vap->va_projid; 3371 else 3372#endif 3373 prid = (xfs_prid_t)dfltprid; 3374 3375 /* 3376 * Make sure that we have allocated dquot(s) on disk. 3377 */ 3378 error = XFS_QM_DQVOPALLOC(mp, dp, 3379 current->td_ucred->cr_uid, 3380 current->td_ucred->cr_groups[0], 3381 prid, 3382 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 3383 if (error) 3384 goto std_return; 3385 3386 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); 3387 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 3388 /* 3389 * The symlink will fit into the inode data fork? 3390 * There can't be any attributes so we get the whole variable part. 3391 */ 3392 if (pathlen <= XFS_LITINO(mp)) 3393 fs_blocks = 0; 3394 else 3395 fs_blocks = XFS_B_TO_FSB(mp, pathlen); 3396 resblks = XFS_SYMLINK_SPACE_RES(mp, link_namelen, fs_blocks); 3397 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, 3398 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 3399 if (error == ENOSPC && fs_blocks == 0) { 3400 resblks = 0; 3401 error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0, 3402 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 3403 } 3404 if (error) { 3405 cancel_flags = 0; 3406 dp = NULL; 3407 goto error_return; 3408 } 3409 3410 xfs_ilock(dp, XFS_ILOCK_EXCL); 3411 3412 /* 3413 * Check whether the directory allows new symlinks or not. 3414 */ 3415 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { 3416 error = XFS_ERROR(EPERM); 3417 goto error_return; 3418 } 3419 3420 /* 3421 * Reserve disk quota : blocks and inode. 3422 */ 3423 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 3424 if (error) 3425 goto error_return; 3426 3427 /* 3428 * Check for ability to enter directory entry, if no space reserved. 3429 */ 3430 if (resblks == 0 && 3431 (error = XFS_DIR_CANENTER(mp, tp, dp, link_name, link_namelen))) 3432 goto error_return; 3433 /* 3434 * Initialize the bmap freelist prior to calling either 3435 * bmapi or the directory create code. 3436 */ 3437 XFS_BMAP_INIT(&free_list, &first_block); 3438 3439 /* 3440 * Allocate an inode for the symlink. 3441 */ 3442 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (vap->va_mode&~S_IFMT), 3443 1, 0, credp, prid, resblks > 0, &ip, NULL); 3444 if (error) { 3445 if (error == ENOSPC) 3446 goto error_return; 3447 goto error1; 3448 } 3449 ITRACE(ip); 3450 3451 VN_HOLD(dir_vp); 3452 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 3453 dp_joined_to_trans = B_TRUE; 3454 3455 /* 3456 * Also attach the dquot(s) to it, if applicable. 3457 */ 3458 XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); 3459 3460 if (resblks) 3461 resblks -= XFS_IALLOC_SPACE_RES(mp); 3462 /* 3463 * If the symlink will fit into the inode, write it inline. 3464 */ 3465 if (pathlen <= XFS_IFORK_DSIZE(ip)) { 3466 xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK); 3467 memcpy(ip->i_df.if_u1.if_data, target_path, pathlen); 3468 ip->i_d.di_size = pathlen; 3469 3470 /* 3471 * The inode was initially created in extent format. 3472 */ 3473 ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT); 3474 ip->i_df.if_flags |= XFS_IFINLINE; 3475 3476 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL; 3477 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); 3478 3479 } else { 3480 first_fsb = 0; 3481 nmaps = SYMLINK_MAPS; 3482 3483 error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, 3484 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, 3485 &first_block, resblks, mval, &nmaps, 3486 &free_list, NULL); 3487 if (error) { 3488 goto error1; 3489 } 3490 3491 if (resblks) 3492 resblks -= fs_blocks; 3493 ip->i_d.di_size = pathlen; 3494 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 3495 3496 cur_chunk = target_path; 3497 for (n = 0; n < nmaps; n++) { 3498 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 3499 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 3500 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 3501 BTOBB(byte_cnt), 0); 3502 ASSERT(bp && !XFS_BUF_GETERROR(bp)); 3503 if (pathlen < byte_cnt) { 3504 byte_cnt = pathlen; 3505 } 3506 pathlen -= byte_cnt; 3507 3508 memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt); 3509 cur_chunk += byte_cnt; 3510 3511 xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1); 3512 } 3513 } 3514 3515 /* 3516 * Create the directory entry for the symlink. 3517 */ 3518 error = XFS_DIR_CREATENAME(mp, tp, dp, link_name, link_namelen, 3519 ip->i_ino, &first_block, &free_list, resblks); 3520 if (error) { 3521 goto error1; 3522 } 3523 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3524 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 3525 3526 /* 3527 * Bump the in memory version number of the parent directory 3528 * so that other processes accessing it will recognize that 3529 * the directory has changed. 3530 */ 3531 dp->i_gen++; 3532 3533 /* 3534 * If this is a synchronous mount, make sure that the 3535 * symlink transaction goes to disk before returning to 3536 * the user. 3537 */ 3538 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 3539 xfs_trans_set_sync(tp); 3540 } 3541 3542 /* 3543 * xfs_trans_commit normally decrements the vnode ref count 3544 * when it unlocks the inode. Since we want to return the 3545 * vnode to the caller, we bump the vnode ref count now. 3546 */ 3547 IHOLD(ip); 3548 3549 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 3550 if (error) { 3551 goto error2; 3552 } 3553 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 3554 XFS_QM_DQRELE(mp, udqp); 3555 XFS_QM_DQRELE(mp, gdqp); 3556 3557 /* Fall through to std_return with error = 0 or errno from 3558 * xfs_trans_commit */ 3559std_return: 3560 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp), 3561 DM_EVENT_POSTSYMLINK)) { 3562 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK, 3563 dir_vp, DM_RIGHT_NULL, 3564 error ? NULL : XFS_ITOV(ip), 3565 DM_RIGHT_NULL, link_name, target_path, 3566 0, error, 0); 3567 } 3568 3569 if (!error) { 3570 xfs_vnode_t *vp; 3571 3572 ASSERT(ip); 3573 vp = XFS_ITOV(ip); 3574 *vpp = vp; 3575 } 3576 return error; 3577 3578 error2: 3579 IRELE(ip); 3580 error1: 3581 xfs_bmap_cancel(&free_list); 3582 cancel_flags |= XFS_TRANS_ABORT; 3583 error_return: 3584 xfs_trans_cancel(tp, cancel_flags); 3585 XFS_QM_DQRELE(mp, udqp); 3586 XFS_QM_DQRELE(mp, gdqp); 3587 3588 if (!dp_joined_to_trans && (dp != NULL)) { 3589 xfs_iunlock(dp, XFS_ILOCK_EXCL); 3590 } 3591 3592 goto std_return; 3593} 3594 3595 3596/* 3597 * xfs_fid2 3598 * 3599 * A fid routine that takes a pointer to a previously allocated 3600 * fid structure (like xfs_fast_fid) but uses a 64 bit inode number. 3601 */ 3602STATIC int 3603xfs_fid2( 3604 bhv_desc_t *bdp, 3605 fid_t *fidp) 3606{ 3607 xfs_inode_t *ip; 3608 xfs_fid2_t *xfid; 3609 3610 vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__, 3611 (inst_t *)__return_address); 3612 ASSERT(sizeof(xfs_fid_t) >= sizeof(xfs_fid2_t)); 3613 3614 xfid = (xfs_fid2_t *)fidp; 3615 ip = XFS_BHVTOI(bdp); 3616 xfid->fid_len = sizeof(xfs_fid2_t) - sizeof(xfid->fid_len); 3617 xfid->fid_pad = 0; 3618 /* 3619 * use memcpy because the inode is a long long and there's no 3620 * assurance that xfid->fid_ino is properly aligned. 3621 */ 3622 memcpy(&xfid->fid_ino, &ip->i_ino, sizeof(xfid->fid_ino)); 3623 xfid->fid_gen = ip->i_d.di_gen; 3624 3625 return 0; 3626} 3627 3628 3629/* 3630 * xfs_rwlock 3631 */ 3632int 3633xfs_rwlock( 3634 bhv_desc_t *bdp, 3635 vrwlock_t locktype) 3636{ 3637 xfs_inode_t *ip; 3638 xfs_vnode_t *vp; 3639 3640 vp = BHV_TO_VNODE(bdp); 3641 if (VN_ISDIR(vp)) 3642 return 1; 3643 ip = XFS_BHVTOI(bdp); 3644 if (locktype == VRWLOCK_WRITE) { 3645 xfs_ilock(ip, XFS_IOLOCK_EXCL); 3646 } else if (locktype == VRWLOCK_TRY_READ) { 3647 return xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED); 3648 } else if (locktype == VRWLOCK_TRY_WRITE) { 3649 return xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL); 3650 } else { 3651 ASSERT((locktype == VRWLOCK_READ) || 3652 (locktype == VRWLOCK_WRITE_DIRECT)); 3653 xfs_ilock(ip, XFS_IOLOCK_SHARED); 3654 } 3655 3656 return 1; 3657} 3658 3659 3660/* 3661 * xfs_rwunlock 3662 */ 3663void 3664xfs_rwunlock( 3665 bhv_desc_t *bdp, 3666 vrwlock_t locktype) 3667{ 3668 xfs_inode_t *ip; 3669 xfs_vnode_t *vp; 3670 3671 vp = BHV_TO_VNODE(bdp); 3672 if (VN_ISDIR(vp)) 3673 return; 3674 ip = XFS_BHVTOI(bdp); 3675 if (locktype == VRWLOCK_WRITE) { 3676 /* 3677 * In the write case, we may have added a new entry to 3678 * the reference cache. This might store a pointer to 3679 * an inode to be released in this inode. If it is there, 3680 * clear the pointer and release the inode after unlocking 3681 * this one. 3682 */ 3683 xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL); 3684 } else { 3685 ASSERT((locktype == VRWLOCK_READ) || 3686 (locktype == VRWLOCK_WRITE_DIRECT)); 3687 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 3688 } 3689 return; 3690} 3691 3692STATIC int 3693xfs_inode_flush( 3694 bhv_desc_t *bdp, 3695 int flags) 3696{ 3697 xfs_inode_t *ip; 3698 xfs_mount_t *mp; 3699 xfs_inode_log_item_t *iip; 3700 int error = 0; 3701 3702 ip = XFS_BHVTOI(bdp); 3703 mp = ip->i_mount; 3704 iip = ip->i_itemp; 3705 3706 if (XFS_FORCED_SHUTDOWN(mp)) 3707 return XFS_ERROR(EIO); 3708 3709 /* 3710 * Bypass inodes which have already been cleaned by 3711 * the inode flush clustering code inside xfs_iflush 3712 */ 3713 if ((ip->i_update_core == 0) && 3714 ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) 3715 return 0; 3716 3717 if (flags & FLUSH_LOG) { 3718 if (iip && iip->ili_last_lsn) { 3719 xlog_t *log = mp->m_log; 3720 xfs_lsn_t sync_lsn; 3721 int s, log_flags = XFS_LOG_FORCE; 3722 3723 s = GRANT_LOCK(log); 3724 sync_lsn = log->l_last_sync_lsn; 3725 GRANT_UNLOCK(log, s); 3726 3727 if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0)) 3728 return 0; 3729 3730 if (flags & FLUSH_SYNC) 3731 log_flags |= XFS_LOG_SYNC; 3732 return xfs_log_force(mp, iip->ili_last_lsn, log_flags); 3733 } 3734 } 3735 3736 /* 3737 * We make this non-blocking if the inode is contended, 3738 * return EAGAIN to indicate to the caller that they 3739 * did not succeed. This prevents the flush path from 3740 * blocking on inodes inside another operation right 3741 * now, they get caught later by xfs_sync. 3742 */ 3743 if (flags & FLUSH_INODE) { 3744 int flush_flags; 3745 3746 if (xfs_ipincount(ip)) 3747 return EAGAIN; 3748 3749 if (flags & FLUSH_SYNC) { 3750 xfs_ilock(ip, XFS_ILOCK_SHARED); 3751 xfs_iflock(ip); 3752 } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 3753 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { 3754 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3755 return EAGAIN; 3756 } 3757 } else { 3758 return EAGAIN; 3759 } 3760 3761 if (flags & FLUSH_SYNC) 3762 flush_flags = XFS_IFLUSH_SYNC; 3763 else 3764 flush_flags = XFS_IFLUSH_ASYNC; 3765 3766 error = xfs_iflush(ip, flush_flags); 3767 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3768 } 3769 3770 return error; 3771} 3772 3773 3774int 3775xfs_set_dmattrs ( 3776 bhv_desc_t *bdp, 3777 u_int evmask, 3778 u_int16_t state, 3779 cred_t *credp) 3780{ 3781 xfs_inode_t *ip; 3782 xfs_trans_t *tp; 3783 xfs_mount_t *mp; 3784 int error; 3785 3786 if (!capable(CAP_SYS_ADMIN)) 3787 return XFS_ERROR(EPERM); 3788 3789 ip = XFS_BHVTOI(bdp); 3790 mp = ip->i_mount; 3791 3792 if (XFS_FORCED_SHUTDOWN(mp)) 3793 return XFS_ERROR(EIO); 3794 3795 tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); 3796 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0); 3797 if (error) { 3798 xfs_trans_cancel(tp, 0); 3799 return error; 3800 } 3801 xfs_ilock(ip, XFS_ILOCK_EXCL); 3802 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 3803 3804 ip->i_iocore.io_dmevmask = ip->i_d.di_dmevmask = evmask; 3805 ip->i_iocore.io_dmstate = ip->i_d.di_dmstate = state; 3806 3807 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 3808 IHOLD(ip); 3809 error = xfs_trans_commit(tp, 0, NULL); 3810 3811 return error; 3812} 3813 3814 3815/* 3816 * xfs_reclaim 3817 */ 3818STATIC int 3819xfs_reclaim( 3820 bhv_desc_t *bdp) 3821{ 3822 xfs_inode_t *ip; 3823 xfs_vnode_t *vp; 3824 3825 vp = BHV_TO_VNODE(bdp); 3826 ip = XFS_BHVTOI(bdp); 3827 3828 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 3829 3830 ASSERT(!VN_MAPPED(vp)); 3831 3832 /* bad inode, get out here ASAP */ 3833 if (VN_BAD(vp)) { 3834 xfs_ireclaim(ip); 3835 return 0; 3836 } 3837 3838 vn_iowait(vp); 3839 3840 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 3841 3842 /* 3843 * Make sure the atime in the XFS inode is correct before freeing the 3844 * Linux inode. 3845 */ 3846 xfs_synchronize_atime(ip); 3847 3848 vnode_destroy_vobject(vp->v_vnode); 3849 3850 /* If we have nothing to flush with this inode then complete the 3851 * teardown now, otherwise break the link between the xfs inode 3852 * and the linux inode and clean up the xfs inode later. This 3853 * avoids flushing the inode to disk during the delete operation 3854 * itself. 3855 */ 3856 if (!ip->i_update_core && (ip->i_itemp == NULL)) { 3857 xfs_ilock(ip, XFS_ILOCK_EXCL); 3858 xfs_iflock(ip); 3859 return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); 3860 } else { 3861 xfs_mount_t *mp = ip->i_mount; 3862 3863 /* Protect sync from us */ 3864 XFS_MOUNT_ILOCK(mp); 3865 vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); 3866 TAILQ_INSERT_TAIL(&mp->m_del_inodes, ip, i_reclaim); 3867 ip->i_flags |= XFS_IRECLAIMABLE; 3868 XFS_MOUNT_IUNLOCK(mp); 3869 } 3870 return 0; 3871} 3872 3873int 3874xfs_finish_reclaim( 3875 xfs_inode_t *ip, 3876 int locked, 3877 int sync_mode) 3878{ 3879 xfs_ihash_t *ih = ip->i_hash; 3880 xfs_vnode_t *vp = XFS_ITOV_NULL(ip); 3881 int error; 3882 3883 if (vp && VN_BAD(vp)) 3884 goto reclaim; 3885 3886 /* The hash lock here protects a thread in xfs_iget_core from 3887 * racing with us on linking the inode back with a vnode. 3888 * Once we have the XFS_IRECLAIM flag set it will not touch 3889 * us. 3890 */ 3891 write_lock(&ih->ih_lock); 3892 if ((ip->i_flags & XFS_IRECLAIM) || 3893 (!(ip->i_flags & XFS_IRECLAIMABLE) && vp == NULL)) { 3894 write_unlock(&ih->ih_lock); 3895 if (locked) { 3896 xfs_ifunlock(ip); 3897 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3898 } 3899 return 1; 3900 } 3901 ip->i_flags |= XFS_IRECLAIM; 3902 write_unlock(&ih->ih_lock); 3903 3904 /* 3905 * If the inode is still dirty, then flush it out. If the inode 3906 * is not in the AIL, then it will be OK to flush it delwri as 3907 * long as xfs_iflush() does not keep any references to the inode. 3908 * We leave that decision up to xfs_iflush() since it has the 3909 * knowledge of whether it's OK to simply do a delwri flush of 3910 * the inode or whether we need to wait until the inode is 3911 * pulled from the AIL. 3912 * We get the flush lock regardless, though, just to make sure 3913 * we don't free it while it is being flushed. 3914 */ 3915 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 3916 if (!locked) { 3917 xfs_ilock(ip, XFS_ILOCK_EXCL); 3918 xfs_iflock(ip); 3919 } 3920 3921 if (ip->i_update_core || 3922 ((ip->i_itemp != NULL) && 3923 (ip->i_itemp->ili_format.ilf_fields != 0))) { 3924 error = xfs_iflush(ip, sync_mode); 3925 /* 3926 * If we hit an error, typically because of filesystem 3927 * shutdown, we don't need to let vn_reclaim to know 3928 * because we're gonna reclaim the inode anyway. 3929 */ 3930 if (error) { 3931 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3932 goto reclaim; 3933 } 3934 xfs_iflock(ip); /* synchronize with xfs_iflush_done */ 3935 } 3936 3937 ASSERT(ip->i_update_core == 0); 3938 ASSERT(ip->i_itemp == NULL || 3939 ip->i_itemp->ili_format.ilf_fields == 0); 3940 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3941 } else if (locked) { 3942 /* 3943 * We are not interested in doing an iflush if we're 3944 * in the process of shutting down the filesystem forcibly. 3945 * So, just reclaim the inode. 3946 */ 3947 xfs_ifunlock(ip); 3948 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3949 } 3950 3951 reclaim: 3952 xfs_ireclaim(ip); 3953 return 0; 3954} 3955 3956int 3957xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock) 3958{ 3959#ifdef RMC 3960 int purged; 3961 xfs_inode_t *ip, *n; 3962 int done = 0; 3963 3964 while (!done) { 3965 purged = 0; 3966 XFS_MOUNT_ILOCK(mp); 3967 TAILQ_FOREACH_SAFE(curr, &mp->m_del_inodes, i_reclaim, next) { 3968 ip = curr; 3969 if (noblock) { 3970 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) 3971 continue; 3972 if (xfs_ipincount(ip) || 3973 !xfs_iflock_nowait(ip)) { 3974 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3975 continue; 3976 } 3977 } 3978 XFS_MOUNT_IUNLOCK(mp); 3979 if (xfs_finish_reclaim(ip, noblock, 3980 XFS_IFLUSH_DELWRI_ELSE_ASYNC)) 3981 delay(1); 3982 purged = 1; 3983 break; 3984 } 3985 3986 done = !purged; 3987 } 3988 3989 XFS_MOUNT_IUNLOCK(mp); 3990#endif 3991 return 0; 3992} 3993 3994/* 3995 * xfs_alloc_file_space() 3996 * This routine allocates disk space for the given file. 3997 * 3998 * If alloc_type == 0, this request is for an ALLOCSP type 3999 * request which will change the file size. In this case, no 4000 * DMAPI event will be generated by the call. A TRUNCATE event 4001 * will be generated later by xfs_setattr. 4002 * 4003 * If alloc_type != 0, this request is for a RESVSP type 4004 * request, and a DMAPI DM_EVENT_WRITE will be generated if the 4005 * lower block boundary byte address is less than the file's 4006 * length. 4007 * 4008 * RETURNS: 4009 * 0 on success 4010 * errno on error 4011 * 4012 */ 4013STATIC int 4014xfs_alloc_file_space( 4015 xfs_inode_t *ip, 4016 xfs_off_t offset, 4017 xfs_off_t len, 4018 int alloc_type, 4019 int attr_flags) 4020{ 4021 xfs_mount_t *mp = ip->i_mount; 4022 xfs_off_t count; 4023 xfs_filblks_t allocated_fsb; 4024 xfs_filblks_t allocatesize_fsb; 4025 xfs_extlen_t extsz, temp; 4026 xfs_fileoff_t startoffset_fsb; 4027 xfs_fsblock_t firstfsb; 4028 int nimaps; 4029 int bmapi_flag; 4030 int quota_flag; 4031 int rt; 4032 xfs_trans_t *tp; 4033 xfs_bmbt_irec_t imaps[1], *imapp; 4034 xfs_bmap_free_t free_list; 4035 uint qblocks, resblks, resrtextents; 4036 int committed; 4037 int error; 4038 4039 vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 4040 4041 if (XFS_FORCED_SHUTDOWN(mp)) 4042 return XFS_ERROR(EIO); 4043 4044 rt = XFS_IS_REALTIME_INODE(ip); 4045 if (unlikely(rt)) { 4046 if (!(extsz = ip->i_d.di_extsize)) 4047 extsz = mp->m_sb.sb_rextsize; 4048 } else { 4049 extsz = ip->i_d.di_extsize; 4050 } 4051 4052 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 4053 return error; 4054 4055 if (len <= 0) 4056 return XFS_ERROR(EINVAL); 4057 4058 count = len; 4059 error = 0; 4060 imapp = &imaps[0]; 4061 nimaps = 1; 4062 bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); 4063 startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 4064 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 4065 4066 /* Generate a DMAPI event if needed. */ 4067 if (alloc_type != 0 && offset < ip->i_d.di_size && 4068 (attr_flags&ATTR_DMI) == 0 && 4069 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { 4070 xfs_off_t end_dmi_offset; 4071 4072 end_dmi_offset = offset+len; 4073 if (end_dmi_offset > ip->i_d.di_size) 4074 end_dmi_offset = ip->i_d.di_size; 4075 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), 4076 offset, end_dmi_offset - offset, 4077 0, NULL); 4078 if (error) 4079 return error; 4080 } 4081 4082 /* 4083 * Allocate file space until done or until there is an error 4084 */ 4085retry: 4086 while (allocatesize_fsb && !error) { 4087 xfs_fileoff_t s, e; 4088 4089 /* 4090 * Determine space reservations for data/realtime. 4091 */ 4092 if (unlikely(extsz)) { 4093 s = startoffset_fsb; 4094 do_div(s, extsz); 4095 s *= extsz; 4096 e = startoffset_fsb + allocatesize_fsb; 4097 if ((temp = do_mod(startoffset_fsb, extsz))) 4098 e += temp; 4099 if ((temp = do_mod(e, extsz))) 4100 e += extsz - temp; 4101 } else { 4102 s = 0; 4103 e = allocatesize_fsb; 4104 } 4105 4106 if (unlikely(rt)) { 4107 resrtextents = qblocks = (uint)(e - s); 4108 resrtextents /= mp->m_sb.sb_rextsize; 4109 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 4110 quota_flag = XFS_QMOPT_RES_RTBLKS; 4111 } else { 4112 resrtextents = 0; 4113 resblks = qblocks = \ 4114 XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s)); 4115 quota_flag = XFS_QMOPT_RES_REGBLKS; 4116 } 4117 4118 /* 4119 * Allocate and setup the transaction. 4120 */ 4121 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 4122 error = xfs_trans_reserve(tp, resblks, 4123 XFS_WRITE_LOG_RES(mp), resrtextents, 4124 XFS_TRANS_PERM_LOG_RES, 4125 XFS_WRITE_LOG_COUNT); 4126 /* 4127 * Check for running out of space 4128 */ 4129 if (error) { 4130 /* 4131 * Free the transaction structure. 4132 */ 4133 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 4134 xfs_trans_cancel(tp, 0); 4135 break; 4136 } 4137 xfs_ilock(ip, XFS_ILOCK_EXCL); 4138 error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, 4139 qblocks, 0, quota_flag); 4140 if (error) 4141 goto error1; 4142 4143 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 4144 xfs_trans_ihold(tp, ip); 4145 4146 /* 4147 * Issue the xfs_bmapi() call to allocate the blocks 4148 */ 4149 XFS_BMAP_INIT(&free_list, &firstfsb); 4150 error = XFS_BMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, 4151 allocatesize_fsb, bmapi_flag, 4152 &firstfsb, 0, imapp, &nimaps, 4153 &free_list, NULL); 4154 if (error) { 4155 goto error0; 4156 } 4157 4158 /* 4159 * Complete the transaction 4160 */ 4161 error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); 4162 if (error) { 4163 goto error0; 4164 } 4165 4166 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 4167 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4168 if (error) { 4169 break; 4170 } 4171 4172 allocated_fsb = imapp->br_blockcount; 4173 4174 if (nimaps == 0) { 4175 error = XFS_ERROR(ENOSPC); 4176 break; 4177 } 4178 4179 startoffset_fsb += allocated_fsb; 4180 allocatesize_fsb -= allocated_fsb; 4181 } 4182dmapi_enospc_check: 4183 if (error == ENOSPC && (attr_flags&ATTR_DMI) == 0 && 4184 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_NOSPACE)) { 4185 4186 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, 4187 XFS_ITOV(ip), DM_RIGHT_NULL, 4188 XFS_ITOV(ip), DM_RIGHT_NULL, 4189 NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ 4190 if (error == 0) 4191 goto retry; /* Maybe DMAPI app. has made space */ 4192 /* else fall through with error from XFS_SEND_DATA */ 4193 } 4194 4195 return error; 4196 4197error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 4198 xfs_bmap_cancel(&free_list); 4199 XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); 4200 4201error1: /* Just cancel transaction */ 4202 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 4203 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4204 goto dmapi_enospc_check; 4205} 4206 4207/* 4208 * Zero file bytes between startoff and endoff inclusive. 4209 * The iolock is held exclusive and no blocks are buffered. 4210 */ 4211STATIC int 4212xfs_zero_remaining_bytes( 4213 xfs_inode_t *ip, 4214 xfs_off_t startoff, 4215 xfs_off_t endoff) 4216{ 4217 xfs_bmbt_irec_t imap; 4218 xfs_fileoff_t offset_fsb; 4219 xfs_off_t lastoffset; 4220 xfs_off_t offset; 4221 xfs_buf_t *bp; 4222 xfs_mount_t *mp = ip->i_mount; 4223 int nimap; 4224 int error = 0; 4225 4226 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, 4227 ip->i_d.di_flags & XFS_DIFLAG_REALTIME ? 4228 mp->m_rtdev_targp : mp->m_ddev_targp); 4229 4230 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 4231 offset_fsb = XFS_B_TO_FSBT(mp, offset); 4232 nimap = 1; 4233 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, offset_fsb, 1, 0, 4234 NULL, 0, &imap, &nimap, NULL, NULL); 4235 if (error || nimap < 1) 4236 break; 4237 ASSERT(imap.br_blockcount >= 1); 4238 ASSERT(imap.br_startoff == offset_fsb); 4239 lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; 4240 if (lastoffset > endoff) 4241 lastoffset = endoff; 4242 if (imap.br_startblock == HOLESTARTBLOCK) 4243 continue; 4244 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 4245 if (imap.br_state == XFS_EXT_UNWRITTEN) 4246 continue; 4247 XFS_BUF_UNDONE(bp); 4248 XFS_BUF_UNWRITE(bp); 4249 XFS_BUF_READ(bp); 4250 XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DB(ip, imap.br_startblock)); 4251 xfsbdstrat(mp, bp); 4252 if ((error = xfs_iowait(bp))) { 4253 xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", 4254 mp, bp, XFS_BUF_ADDR(bp)); 4255 break; 4256 } 4257 memset(XFS_BUF_PTR(bp) + 4258 (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 4259 0, lastoffset - offset + 1); 4260 XFS_BUF_UNDONE(bp); 4261 XFS_BUF_UNREAD(bp); 4262 XFS_BUF_WRITE(bp); 4263 xfsbdstrat(mp, bp); 4264 if ((error = xfs_iowait(bp))) { 4265 xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", 4266 mp, bp, XFS_BUF_ADDR(bp)); 4267 break; 4268 } 4269 } 4270 xfs_buf_free(bp); 4271 return error; 4272} 4273 4274/* 4275 * xfs_free_file_space() 4276 * This routine frees disk space for the given file. 4277 * 4278 * This routine is only called by xfs_change_file_space 4279 * for an UNRESVSP type call. 4280 * 4281 * RETURNS: 4282 * 0 on success 4283 * errno on error 4284 * 4285 */ 4286STATIC int 4287xfs_free_file_space( 4288 xfs_inode_t *ip, 4289 xfs_off_t offset, 4290 xfs_off_t len, 4291 int attr_flags) 4292{ 4293 xfs_vnode_t *vp; 4294 int committed; 4295 int done; 4296 xfs_off_t end_dmi_offset; 4297 xfs_fileoff_t endoffset_fsb; 4298 int error; 4299 xfs_fsblock_t firstfsb; 4300 xfs_bmap_free_t free_list; 4301 xfs_off_t ilen; 4302 xfs_bmbt_irec_t imap; 4303 xfs_off_t ioffset; 4304 xfs_extlen_t mod=0; 4305 xfs_mount_t *mp; 4306 int nimap; 4307 uint resblks; 4308 int rounding; 4309 int rt; 4310 xfs_fileoff_t startoffset_fsb; 4311 xfs_trans_t *tp; 4312 int need_iolock = 1; 4313 4314 vp = XFS_ITOV(ip); 4315 mp = ip->i_mount; 4316 4317 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 4318 4319 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 4320 return error; 4321 4322 error = 0; 4323 if (len <= 0) /* if nothing being freed */ 4324 return error; 4325 rt = (ip->i_d.di_flags & XFS_DIFLAG_REALTIME); 4326 startoffset_fsb = XFS_B_TO_FSB(mp, offset); 4327 end_dmi_offset = offset + len; 4328 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); 4329 4330 if (offset < ip->i_d.di_size && 4331 (attr_flags & ATTR_DMI) == 0 && 4332 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { 4333 if (end_dmi_offset > ip->i_d.di_size) 4334 end_dmi_offset = ip->i_d.di_size; 4335 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, 4336 offset, end_dmi_offset - offset, 4337 AT_DELAY_FLAG(attr_flags), NULL); 4338 if (error) 4339 return error; 4340 } 4341 4342 ASSERT(attr_flags & ATTR_NOLOCK ? attr_flags & ATTR_DMI : 1); 4343 if (attr_flags & ATTR_NOLOCK) 4344 need_iolock = 0; 4345 if (need_iolock) { 4346 xfs_ilock(ip, XFS_IOLOCK_EXCL); 4347 vn_iowait(vp); /* wait for the completion of any pending DIOs */ 4348 } 4349 4350 rounding = MAX((__uint8_t)(1 << mp->m_sb.sb_blocklog), 4351 (__uint8_t)NBPP); 4352 ilen = len + (offset & (rounding - 1)); 4353 ioffset = offset & ~(rounding - 1); 4354 if (ilen & (rounding - 1)) 4355 ilen = (ilen + rounding) & ~(rounding - 1); 4356 4357 if (VN_CACHED(vp) != 0) { 4358 xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, 4359 ctooff(offtoct(ioffset)), -1); 4360 XVOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(ioffset)), 4361 -1, FI_REMAPF_LOCKED); 4362 } 4363 4364 /* 4365 * Need to zero the stuff we're not freeing, on disk. 4366 * If its a realtime file & can't use unwritten extents then we 4367 * actually need to zero the extent edges. Otherwise xfs_bunmapi 4368 * will take care of it for us. 4369 */ 4370 if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { 4371 nimap = 1; 4372 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, startoffset_fsb, 4373 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 4374 if (error) 4375 goto out_unlock_iolock; 4376 ASSERT(nimap == 0 || nimap == 1); 4377 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 4378 xfs_daddr_t block; 4379 4380 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 4381 block = imap.br_startblock; 4382 mod = do_div(block, mp->m_sb.sb_rextsize); 4383 if (mod) 4384 startoffset_fsb += mp->m_sb.sb_rextsize - mod; 4385 } 4386 nimap = 1; 4387 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, endoffset_fsb - 1, 4388 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 4389 if (error) 4390 goto out_unlock_iolock; 4391 ASSERT(nimap == 0 || nimap == 1); 4392 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 4393 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 4394 mod++; 4395 if (mod && (mod != mp->m_sb.sb_rextsize)) 4396 endoffset_fsb -= mod; 4397 } 4398 } 4399 if ((done = (endoffset_fsb <= startoffset_fsb))) 4400 /* 4401 * One contiguous piece to clear 4402 */ 4403 error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1); 4404 else { 4405 /* 4406 * Some full blocks, possibly two pieces to clear 4407 */ 4408 if (offset < XFS_FSB_TO_B(mp, startoffset_fsb)) 4409 error = xfs_zero_remaining_bytes(ip, offset, 4410 XFS_FSB_TO_B(mp, startoffset_fsb) - 1); 4411 if (!error && 4412 XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len) 4413 error = xfs_zero_remaining_bytes(ip, 4414 XFS_FSB_TO_B(mp, endoffset_fsb), 4415 offset + len - 1); 4416 } 4417 4418 /* 4419 * free file space until done or until there is an error 4420 */ 4421 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 4422 while (!error && !done) { 4423 4424 /* 4425 * allocate and setup the transaction 4426 */ 4427 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 4428 error = xfs_trans_reserve(tp, 4429 resblks, 4430 XFS_WRITE_LOG_RES(mp), 4431 0, 4432 XFS_TRANS_PERM_LOG_RES, 4433 XFS_WRITE_LOG_COUNT); 4434 4435 /* 4436 * check for running out of space 4437 */ 4438 if (error) { 4439 /* 4440 * Free the transaction structure. 4441 */ 4442 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 4443 xfs_trans_cancel(tp, 0); 4444 break; 4445 } 4446 xfs_ilock(ip, XFS_ILOCK_EXCL); 4447 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, 4448 ip->i_udquot, ip->i_gdquot, resblks, 0, 4449 XFS_QMOPT_RES_REGBLKS); 4450 if (error) 4451 goto error1; 4452 4453 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 4454 xfs_trans_ihold(tp, ip); 4455 4456 /* 4457 * issue the bunmapi() call to free the blocks 4458 */ 4459 XFS_BMAP_INIT(&free_list, &firstfsb); 4460 error = XFS_BUNMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, 4461 endoffset_fsb - startoffset_fsb, 4462 0, 2, &firstfsb, &free_list, NULL, &done); 4463 if (error) { 4464 goto error0; 4465 } 4466 4467 /* 4468 * complete the transaction 4469 */ 4470 error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); 4471 if (error) { 4472 goto error0; 4473 } 4474 4475 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 4476 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4477 } 4478 4479 out_unlock_iolock: 4480 if (need_iolock) 4481 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 4482 return error; 4483 4484 error0: 4485 xfs_bmap_cancel(&free_list); 4486 error1: 4487 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 4488 xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) : 4489 XFS_ILOCK_EXCL); 4490 return error; 4491} 4492 4493/* 4494 * xfs_change_file_space() 4495 * This routine allocates or frees disk space for the given file. 4496 * The user specified parameters are checked for alignment and size 4497 * limitations. 4498 * 4499 * RETURNS: 4500 * 0 on success 4501 * errno on error 4502 * 4503 */ 4504int 4505xfs_change_file_space( 4506 bhv_desc_t *bdp, 4507 int cmd, 4508 xfs_flock64_t *bf, 4509 xfs_off_t offset, 4510 cred_t *credp, 4511 int attr_flags) 4512{ 4513 int clrprealloc; 4514 int error; 4515 xfs_fsize_t fsize; 4516 xfs_inode_t *ip; 4517 xfs_mount_t *mp; 4518 int setprealloc; 4519 xfs_off_t startoffset; 4520 xfs_off_t llen; 4521 xfs_trans_t *tp; 4522 xfs_vattr_t va; 4523 xfs_vnode_t *vp; 4524 4525 vp = BHV_TO_VNODE(bdp); 4526 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 4527 4528 ip = XFS_BHVTOI(bdp); 4529 mp = ip->i_mount; 4530 4531 /* 4532 * must be a regular file and have write permission 4533 */ 4534 if (!VN_ISREG(vp)) 4535 return XFS_ERROR(EINVAL); 4536 4537 xfs_ilock(ip, XFS_ILOCK_SHARED); 4538 4539 if ((error = xfs_iaccess(ip, S_IWUSR, credp))) { 4540 xfs_iunlock(ip, XFS_ILOCK_SHARED); 4541 return error; 4542 } 4543 4544 xfs_iunlock(ip, XFS_ILOCK_SHARED); 4545 4546 switch (bf->l_whence) { 4547 case 0: /*SEEK_SET*/ 4548 break; 4549 case 1: /*SEEK_CUR*/ 4550 bf->l_start += offset; 4551 break; 4552 case 2: /*SEEK_END*/ 4553 bf->l_start += ip->i_d.di_size; 4554 break; 4555 default: 4556 return XFS_ERROR(EINVAL); 4557 } 4558 4559 llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len; 4560 4561 if ( (bf->l_start < 0) 4562 || (bf->l_start > XFS_MAXIOFFSET(mp)) 4563 || (bf->l_start + llen < 0) 4564 || (bf->l_start + llen > XFS_MAXIOFFSET(mp))) 4565 return XFS_ERROR(EINVAL); 4566 4567 bf->l_whence = 0; 4568 4569 startoffset = bf->l_start; 4570 fsize = ip->i_d.di_size; 4571 4572 /* 4573 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve 4574 * file space. 4575 * These calls do NOT zero the data space allocated to the file, 4576 * nor do they change the file size. 4577 * 4578 * XFS_IOC_ALLOCSP and XFS_IOC_FREESP will allocate and free file 4579 * space. 4580 * These calls cause the new file data to be zeroed and the file 4581 * size to be changed. 4582 */ 4583 setprealloc = clrprealloc = 0; 4584 4585 switch (cmd) { 4586 case XFS_IOC_RESVSP: 4587 case XFS_IOC_RESVSP64: 4588 error = xfs_alloc_file_space(ip, startoffset, bf->l_len, 4589 1, attr_flags); 4590 if (error) 4591 return error; 4592 setprealloc = 1; 4593 break; 4594 4595 case XFS_IOC_UNRESVSP: 4596 case XFS_IOC_UNRESVSP64: 4597 if ((error = xfs_free_file_space(ip, startoffset, bf->l_len, 4598 attr_flags))) 4599 return error; 4600 break; 4601 4602 case XFS_IOC_ALLOCSP: 4603 case XFS_IOC_ALLOCSP64: 4604 case XFS_IOC_FREESP: 4605 case XFS_IOC_FREESP64: 4606 if (startoffset > fsize) { 4607 error = xfs_alloc_file_space(ip, fsize, 4608 startoffset - fsize, 0, attr_flags); 4609 if (error) 4610 break; 4611 } 4612 4613 va.va_mask = XFS_AT_SIZE; 4614 va.va_size = startoffset; 4615 4616 error = xfs_setattr(bdp, &va, attr_flags, credp); 4617 4618 if (error) 4619 return error; 4620 4621 clrprealloc = 1; 4622 break; 4623 4624 default: 4625 ASSERT(0); 4626 return XFS_ERROR(EINVAL); 4627 } 4628 4629 /* 4630 * update the inode timestamp, mode, and prealloc flag bits 4631 */ 4632 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); 4633 4634 if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp), 4635 0, 0, 0))) { 4636 /* ASSERT(0); */ 4637 xfs_trans_cancel(tp, 0); 4638 return error; 4639 } 4640 4641 xfs_ilock(ip, XFS_ILOCK_EXCL); 4642 4643 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 4644 xfs_trans_ihold(tp, ip); 4645 4646 if ((attr_flags & ATTR_DMI) == 0) { 4647 ip->i_d.di_mode &= ~S_ISUID; 4648 4649 /* 4650 * Note that we don't have to worry about mandatory 4651 * file locking being disabled here because we only 4652 * clear the S_ISGID bit if the Group execute bit is 4653 * on, but if it was on then mandatory locking wouldn't 4654 * have been enabled. 4655 */ 4656 if (ip->i_d.di_mode & S_IXGRP) 4657 ip->i_d.di_mode &= ~S_ISGID; 4658 4659 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 4660 } 4661 if (setprealloc) 4662 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 4663 else if (clrprealloc) 4664 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 4665 4666 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 4667 xfs_trans_set_sync(tp); 4668 4669 error = xfs_trans_commit(tp, 0, NULL); 4670 4671 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4672 4673 return error; 4674} 4675 4676 4677xfs_vnodeops_t xfs_vnodeops = { 4678 BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS), 4679 .vop_open = xfs_open, 4680 .vop_read = xfs_read, 4681#ifdef HAVE_SENDFILE 4682 .vop_sendfile = xfs_sendfile, 4683#endif 4684 .vop_write = xfs_write, 4685 .vop_ioctl = xfs_ioctl, 4686 .vop_getattr = xfs_getattr, 4687 .vop_setattr = xfs_setattr, 4688 .vop_access = xfs_access, 4689 .vop_lookup = xfs_lookup, 4690 .vop_create = xfs_create, 4691 .vop_remove = xfs_remove, 4692 .vop_link = xfs_link, 4693 .vop_rename = xfs_rename, 4694 .vop_mkdir = xfs_mkdir, 4695 .vop_rmdir = xfs_rmdir, 4696 .vop_readdir = xfs_readdir, 4697 .vop_symlink = xfs_symlink, 4698 .vop_readlink = xfs_readlink, 4699 .vop_fsync = xfs_fsync, 4700 .vop_inactive = xfs_inactive, 4701 .vop_fid2 = xfs_fid2, 4702 .vop_rwlock = xfs_rwlock, 4703 .vop_rwunlock = xfs_rwunlock, 4704 .vop_bmap = xfs_bmap, 4705 .vop_reclaim = xfs_reclaim, 4706 .vop_attr_get = xfs_attr_get, 4707 .vop_attr_set = xfs_attr_set, 4708 .vop_attr_remove = xfs_attr_remove, 4709 .vop_attr_list = xfs_attr_list, 4710 .vop_link_removed = (xfs_vop_link_removed_t)fs_noval, 4711 .vop_vnode_change = (xfs_vop_vnode_change_t)fs_noval, 4712 .vop_tosspages = fs_tosspages, 4713 .vop_flushinval_pages = fs_flushinval_pages, 4714 .vop_flush_pages = fs_flush_pages, 4715 .vop_release = xfs_release, 4716 .vop_iflush = xfs_inode_flush, 4717}; 4718