1/* 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "xfs.h" 20#include "xfs_fs.h" 21#include "xfs_types.h" 22#include "xfs_bit.h" 23#include "xfs_log.h" 24#include "xfs_inum.h" 25#include "xfs_trans.h" 26#include "xfs_sb.h" 27#include "xfs_ag.h" 28#include "xfs_dir.h" 29#include "xfs_dir2.h" 30#include "xfs_dmapi.h" 31#include "xfs_mount.h" 32#include "xfs_da_btree.h" 33#include "xfs_bmap_btree.h" 34#include "xfs_alloc_btree.h" 35#include "xfs_ialloc_btree.h" 36#include "xfs_dir_sf.h" 37#include "xfs_dir2_sf.h" 38#include "xfs_attr_sf.h" 39#include "xfs_dinode.h" 40#include "xfs_inode.h" 41#include "xfs_inode_item.h" 42#include "xfs_dir_leaf.h" 43#include "xfs_itable.h" 44#include "xfs_btree.h" 45#include "xfs_ialloc.h" 46#include "xfs_alloc.h" 47#include "xfs_bmap.h" 48#include "xfs_attr.h" 49#include "xfs_rw.h" 50#include "xfs_error.h" 51#include "xfs_quota.h" 52#include "xfs_utils.h" 53#include "xfs_rtalloc.h" 54#include "xfs_refcache.h" 55#include "xfs_trans_space.h" 56#include "xfs_log_priv.h" 57#include "xfs_mac.h" 58 59#include "xfs_fs.h" 60 61/* 62 * The maximum pathlen is 1024 bytes. Since the minimum file system 63 * blocksize is 512 bytes, we can get a max of 2 extents back from 64 * bmapi. 65 */ 66#define SYMLINK_MAPS 2 67 68/* 69 * For xfs, we check that the file isn't too big to be opened by this kernel. 70 * No other open action is required for regular files. Devices are handled 71 * through the specfs file system, pipes through fifofs. Device and 72 * fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively, 73 * when a new vnode is first looked up or created. 74 */ 75STATIC int 76xfs_open( 77 bhv_desc_t *bdp, 78 cred_t *credp) 79{ 80 int mode; 81 xfs_vnode_t *vp; 82 xfs_inode_t *ip; 83 84 vp = BHV_TO_VNODE(bdp); 85 ip = XFS_BHVTOI(bdp); 86 87 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 88 return XFS_ERROR(EIO); 89 90 /* 91 * If it's a directory with any blocks, read-ahead block 0 92 * as we're almost certain to have the next operation be a read there. 93 */ 94 if (VN_ISDIR(vp) && ip->i_d.di_nextents > 0) { 95 mode = xfs_ilock_map_shared(ip); 96 if (ip->i_d.di_nextents > 0) 97 (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); 98 xfs_iunlock(ip, mode); 99 } 100 return 0; 101} 102 103 104/* 105 * xfs_getattr 106 */ 107STATIC int 108xfs_getattr( 109 bhv_desc_t *bdp, 110 xfs_vattr_t *vap, 111 int flags, 112 cred_t *credp) 113{ 114 xfs_inode_t *ip; 115 xfs_mount_t *mp; 116 xfs_vnode_t *vp; 117 118 vp = BHV_TO_VNODE(bdp); 119 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 120 121 ip = XFS_BHVTOI(bdp); 122 mp = ip->i_mount; 123 124 if (XFS_FORCED_SHUTDOWN(mp)) 125 return XFS_ERROR(EIO); 126 127 if (!(flags & ATTR_LAZY)) 128 xfs_ilock(ip, XFS_ILOCK_SHARED); 129 130 vap->va_size = ip->i_d.di_size; 131 if (vap->va_mask == XFS_AT_SIZE) 132 goto all_done; 133 134 vap->va_nblocks = 135 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); 136 vap->va_nodeid = ip->i_ino; 137#if XFS_BIG_INUMS 138 vap->va_nodeid += mp->m_inoadd; 139#endif 140 vap->va_nlink = ip->i_d.di_nlink; 141 142 /* 143 * Quick exit for non-stat callers 144 */ 145 if ((vap->va_mask & 146 ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID| 147 XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0) 148 goto all_done; 149 150 /* 151 * Copy from in-core inode. 152 */ 153 vap->va_mode = ip->i_d.di_mode; 154 vap->va_uid = ip->i_d.di_uid; 155 vap->va_gid = ip->i_d.di_gid; 156 vap->va_projid = ip->i_d.di_projid; 157 158 /* 159 * Check vnode type block/char vs. everything else. 160 */ 161 switch (ip->i_d.di_mode & S_IFMT) { 162 case S_IFBLK: 163 case S_IFCHR: 164 vap->va_rdev = ip->i_df.if_u2.if_rdev; 165 vap->va_blocksize = BLKDEV_IOSIZE; 166 break; 167 default: 168 vap->va_rdev = 0; 169 170 if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { 171 vap->va_blocksize = xfs_preferred_iosize(mp); 172 } else { 173 174 /* 175 * If the file blocks are being allocated from a 176 * realtime partition, then return the inode's 177 * realtime extent size or the realtime volume's 178 * extent size. 179 */ 180 vap->va_blocksize = ip->i_d.di_extsize ? 181 (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : 182 (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog); 183 } 184 break; 185 } 186 187 vn_atime_to_timespec(vp, &vap->va_atime); 188 vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec; 189 vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; 190 vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec; 191 vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; 192 193 /* 194 * Exit for stat callers. See if any of the rest of the fields 195 * to be filled in are needed. 196 */ 197 if ((vap->va_mask & 198 (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS| 199 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0) 200 goto all_done; 201 202 /* 203 * Convert di_flags to xflags. 204 */ 205 vap->va_xflags = xfs_ip2xflags(ip); 206 207 /* 208 * Exit for inode revalidate. See if any of the rest of 209 * the fields to be filled in are needed. 210 */ 211 if ((vap->va_mask & 212 (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS| 213 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0) 214 goto all_done; 215 216 vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog; 217 vap->va_nextents = 218 (ip->i_df.if_flags & XFS_IFEXTENTS) ? 219 ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) : 220 ip->i_d.di_nextents; 221 if (ip->i_afp) 222 vap->va_anextents = 223 (ip->i_afp->if_flags & XFS_IFEXTENTS) ? 224 ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) : 225 ip->i_d.di_anextents; 226 else 227 vap->va_anextents = 0; 228 vap->va_gen = ip->i_d.di_gen; 229 230 all_done: 231 if (!(flags & ATTR_LAZY)) 232 xfs_iunlock(ip, XFS_ILOCK_SHARED); 233 return 0; 234} 235 236 237/* 238 * xfs_setattr 239 */ 240int 241xfs_setattr( 242 bhv_desc_t *bdp, 243 xfs_vattr_t *vap, 244 int flags, 245 cred_t *credp) 246{ 247 xfs_inode_t *ip; 248 xfs_trans_t *tp; 249 xfs_mount_t *mp; 250 int mask; 251 int code; 252 uint lock_flags; 253 uint commit_flags=0; 254 uid_t uid=0, iuid=0; 255 gid_t gid=0, igid=0; 256 int timeflags = 0; 257 xfs_vnode_t *vp; 258 xfs_prid_t projid=0, iprojid=0; 259 int mandlock_before, mandlock_after; 260 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 261 int file_owner; 262 int need_iolock = 1; 263 264 vp = BHV_TO_VNODE(bdp); 265 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 266 267 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 268 return XFS_ERROR(EROFS); 269 270 /* 271 * Cannot set certain attributes. 272 */ 273 mask = vap->va_mask; 274 if (mask & XFS_AT_NOSET) { 275 return XFS_ERROR(EINVAL); 276 } 277 278 ip = XFS_BHVTOI(bdp); 279 mp = ip->i_mount; 280 281 if (XFS_FORCED_SHUTDOWN(mp)) 282 return XFS_ERROR(EIO); 283 284 /* 285 * Timestamps do not need to be logged and hence do not 286 * need to be done within a transaction. 287 */ 288 if (mask & XFS_AT_UPDTIMES) { 289 ASSERT((mask & ~XFS_AT_UPDTIMES) == 0); 290 timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) | 291 ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) | 292 ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0); 293 xfs_ichgtime(ip, timeflags); 294 return 0; 295 } 296 297 olddquot1 = olddquot2 = NULL; 298 udqp = gdqp = NULL; 299 300 /* 301 * If disk quotas is on, we make sure that the dquots do exist on disk, 302 * before we start any other transactions. Trying to do this later 303 * is messy. We don't care to take a readlock to look at the ids 304 * in inode here, because we can't hold it across the trans_reserve. 305 * If the IDs do change before we take the ilock, we're covered 306 * because the i_*dquot fields will get updated anyway. 307 */ 308 if (XFS_IS_QUOTA_ON(mp) && 309 (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) { 310 uint qflags = 0; 311 312 if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { 313 uid = vap->va_uid; 314 qflags |= XFS_QMOPT_UQUOTA; 315 } else { 316 uid = ip->i_d.di_uid; 317 } 318 if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) { 319 gid = vap->va_gid; 320 qflags |= XFS_QMOPT_GQUOTA; 321 } else { 322 gid = ip->i_d.di_gid; 323 } 324 if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) { 325 projid = vap->va_projid; 326 qflags |= XFS_QMOPT_PQUOTA; 327 } else { 328 projid = ip->i_d.di_projid; 329 } 330 /* 331 * We take a reference when we initialize udqp and gdqp, 332 * so it is important that we never blindly double trip on 333 * the same variable. See xfs_create() for an example. 334 */ 335 ASSERT(udqp == NULL); 336 ASSERT(gdqp == NULL); 337 code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags, 338 &udqp, &gdqp); 339 if (code) 340 return code; 341 } 342 343 /* 344 * For the other attributes, we acquire the inode lock and 345 * first do an error checking pass. 346 */ 347 tp = NULL; 348 lock_flags = XFS_ILOCK_EXCL; 349 ASSERT(flags & ATTR_NOLOCK ? flags & ATTR_DMI : 1); 350 if (flags & ATTR_NOLOCK) 351 need_iolock = 0; 352 if (!(mask & XFS_AT_SIZE)) { 353 if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) || 354 (mp->m_flags & XFS_MOUNT_WSYNC)) { 355 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 356 commit_flags = 0; 357 if ((code = xfs_trans_reserve(tp, 0, 358 XFS_ICHANGE_LOG_RES(mp), 0, 359 0, 0))) { 360 lock_flags = 0; 361 goto error_return; 362 } 363 } 364 } else { 365 if (DM_EVENT_ENABLED (vp->v_vfsp, ip, DM_EVENT_TRUNCATE) && 366 !(flags & ATTR_DMI)) { 367 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; 368 code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp, 369 vap->va_size, 0, dmflags, NULL); 370 if (code) { 371 lock_flags = 0; 372 goto error_return; 373 } 374 } 375 if (need_iolock) 376 lock_flags |= XFS_IOLOCK_EXCL; 377 } 378 379 xfs_ilock(ip, lock_flags); 380 381 /* boolean: are we the file owner? */ 382#if 0 383 file_owner = (current_fsuid(credp) == ip->i_d.di_uid); 384#else 385 file_owner = (credp->cr_uid == ip->i_d.di_uid); 386#endif 387 388 /* 389 * Change various properties of a file. 390 * Only the owner or users with CAP_FOWNER 391 * capability may do these things. 392 */ 393 if (mask & 394 (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID| 395 XFS_AT_GID|XFS_AT_PROJID)) { 396 /* 397 * CAP_FOWNER overrides the following restrictions: 398 * 399 * The user ID of the calling process must be equal 400 * to the file owner ID, except in cases where the 401 * CAP_FSETID capability is applicable. 402 */ 403 if (!file_owner && !capable(CAP_FOWNER)) { 404 code = XFS_ERROR(EPERM); 405 goto error_return; 406 } 407 408 /* 409 * CAP_FSETID overrides the following restrictions: 410 * 411 * The effective user ID of the calling process shall match 412 * the file owner when setting the set-user-ID and 413 * set-group-ID bits on that file. 414 * 415 * The effective group ID or one of the supplementary group 416 * IDs of the calling process shall match the group owner of 417 * the file when setting the set-group-ID bit on that file 418 */ 419 if (mask & XFS_AT_MODE) { 420 mode_t m = 0; 421 422 if ((vap->va_mode & S_ISUID) && !file_owner) 423 m |= S_ISUID; 424 if ((vap->va_mode & S_ISGID) && 425 !groupmember((gid_t)ip->i_d.di_gid, credp)) 426 m |= S_ISGID; 427#if 1 428 /* Linux allows this, Irix doesn't. */ 429 if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp)) 430 m |= S_ISVTX; 431#endif 432 if (m && !capable(CAP_FSETID)) 433 vap->va_mode &= ~m; 434 } 435 } 436 437 /* 438 * Change file ownership. Must be the owner or privileged. 439 * If the system was configured with the "restricted_chown" 440 * option, the owner is not permitted to give away the file, 441 * and can change the group id only to a group of which he 442 * or she is a member. 443 */ 444 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 445 /* 446 * These IDs could have changed since we last looked at them. 447 * But, we're assured that if the ownership did change 448 * while we didn't have the inode locked, inode's dquot(s) 449 * would have changed also. 450 */ 451 iuid = ip->i_d.di_uid; 452 iprojid = ip->i_d.di_projid; 453 igid = ip->i_d.di_gid; 454 gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; 455 uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid; 456 457 projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid : 458 iprojid; 459 460 /* 461 * CAP_CHOWN overrides the following restrictions: 462 * 463 * If _POSIX_CHOWN_RESTRICTED is defined, this capability 464 * shall override the restriction that a process cannot 465 * change the user ID of a file it owns and the restriction 466 * that the group ID supplied to the chown() function 467 * shall be equal to either the group ID or one of the 468 * supplementary group IDs of the calling process. 469 */ 470 if (restricted_chown && 471 (iuid != uid || (igid != gid && 472 !groupmember((gid_t)gid, credp))) && 473 !capable(CAP_CHOWN)) { 474 code = XFS_ERROR(EPERM); 475 goto error_return; 476 } 477 /* 478 * Do a quota reservation only if uid/projid/gid is actually 479 * going to change. 480 */ 481 if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || 482 (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) || 483 (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { 484 ASSERT(tp); 485 code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, 486 capable(CAP_FOWNER) ? 487 XFS_QMOPT_FORCE_RES : 0); 488 if (code) /* out of quota */ 489 goto error_return; 490 } 491 } 492 493 /* 494 * Truncate file. Must have write permission and not be a directory. 495 */ 496 if (mask & XFS_AT_SIZE) { 497 /* Short circuit the truncate case for zero length files */ 498 if ((vap->va_size == 0) && 499 (ip->i_d.di_size == 0) && (ip->i_d.di_nextents == 0)) { 500 xfs_iunlock(ip, XFS_ILOCK_EXCL); 501 lock_flags &= ~XFS_ILOCK_EXCL; 502 if (mask & XFS_AT_CTIME) 503 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 504 code = 0; 505 goto error_return; 506 } 507 508 if (VN_ISDIR(vp)) { 509 code = XFS_ERROR(EISDIR); 510 goto error_return; 511 } else if (!VN_ISREG(vp)) { 512 code = XFS_ERROR(EINVAL); 513 goto error_return; 514 } 515 /* 516 * Make sure that the dquots are attached to the inode. 517 */ 518 if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED))) 519 goto error_return; 520 } 521 522 /* 523 * Change file access or modified times. 524 */ 525 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 526 if (!file_owner) { 527 if ((flags & ATTR_UTIME) && 528 !capable(CAP_FOWNER)) { 529 code = XFS_ERROR(EPERM); 530 goto error_return; 531 } 532 } 533 } 534 535 /* 536 * Change extent size or realtime flag. 537 */ 538 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) { 539 /* 540 * Can't change extent size if any extents are allocated. 541 */ 542 if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) && 543 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != 544 vap->va_extsize) ) { 545 code = XFS_ERROR(EINVAL); /* EFBIG? */ 546 goto error_return; 547 } 548 /* 549 * Can't change realtime flag if any extents are allocated. 550 */ 551 if ((ip->i_d.di_nextents || ip->i_delayed_blks) && 552 (mask & XFS_AT_XFLAGS) && 553 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 554 (vap->va_xflags & XFS_XFLAG_REALTIME)) { 555 code = XFS_ERROR(EINVAL); /* EFBIG? */ 556 goto error_return; 557 } 558 559 /* 560 * Extent size must be a multiple of the appropriate block 561 * size, if set at all. 562 */ 563 if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) { 564 xfs_extlen_t size; 565 566 if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) || 567 ((mask & XFS_AT_XFLAGS) && 568 (vap->va_xflags & XFS_XFLAG_REALTIME))) { 569 size = mp->m_sb.sb_rextsize << 570 mp->m_sb.sb_blocklog; 571 } else { 572 size = mp->m_sb.sb_blocksize; 573 } 574 if (vap->va_extsize % size) { 575 code = XFS_ERROR(EINVAL); 576 goto error_return; 577 } 578 } 579 /* 580 * If realtime flag is set then must have realtime data. 581 */ 582 if ((mask & XFS_AT_XFLAGS) && 583 (vap->va_xflags & XFS_XFLAG_REALTIME)) { 584 if ((mp->m_sb.sb_rblocks == 0) || 585 (mp->m_sb.sb_rextsize == 0) || 586 (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { 587 code = XFS_ERROR(EINVAL); 588 goto error_return; 589 } 590 } 591 592 /* 593 * Can't modify an immutable/append-only file unless 594 * we have appropriate permission. 595 */ 596 if ((mask & XFS_AT_XFLAGS) && 597 (ip->i_d.di_flags & 598 (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || 599 (vap->va_xflags & 600 (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && 601 !capable(CAP_LINUX_IMMUTABLE)) { 602 code = XFS_ERROR(EPERM); 603 goto error_return; 604 } 605 } 606 607 /* 608 * Now we can make the changes. Before we join the inode 609 * to the transaction, if XFS_AT_SIZE is set then take care of 610 * the part of the truncation that must be done without the 611 * inode lock. This needs to be done before joining the inode 612 * to the transaction, because the inode cannot be unlocked 613 * once it is a part of the transaction. 614 */ 615 if (mask & XFS_AT_SIZE) { 616 code = 0; 617 if ((vap->va_size > ip->i_d.di_size) && 618 (flags & ATTR_NOSIZETOK) == 0) { 619 code = xfs_igrow_start(ip, vap->va_size, credp); 620 } 621 xfs_iunlock(ip, XFS_ILOCK_EXCL); 622 vn_iowait(vp); /* wait for the completion of any pending DIOs */ 623 if (!code) 624 code = xfs_itruncate_data(ip, vap->va_size); 625 if (code) { 626 ASSERT(tp == NULL); 627 lock_flags &= ~XFS_ILOCK_EXCL; 628 ASSERT(lock_flags == XFS_IOLOCK_EXCL); 629 goto error_return; 630 } 631 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 632 if ((code = xfs_trans_reserve(tp, 0, 633 XFS_ITRUNCATE_LOG_RES(mp), 0, 634 XFS_TRANS_PERM_LOG_RES, 635 XFS_ITRUNCATE_LOG_COUNT))) { 636 xfs_trans_cancel(tp, 0); 637 if (need_iolock) 638 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 639 return code; 640 } 641 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 642 xfs_ilock(ip, XFS_ILOCK_EXCL); 643 } 644 645 if (tp) { 646 xfs_trans_ijoin(tp, ip, lock_flags); 647 xfs_trans_ihold(tp, ip); 648 } 649 650 /* determine whether mandatory locking mode changes */ 651 mandlock_before = MANDLOCK(vp, ip->i_d.di_mode); 652 653 /* 654 * Truncate file. Must have write permission and not be a directory. 655 */ 656 if (mask & XFS_AT_SIZE) { 657 if (vap->va_size > ip->i_d.di_size) { 658 xfs_igrow_finish(tp, ip, vap->va_size, 659 !(flags & ATTR_DMI)); 660 } else if ((vap->va_size <= ip->i_d.di_size) || 661 ((vap->va_size == 0) && ip->i_d.di_nextents)) { 662 /* 663 * signal a sync transaction unless 664 * we're truncating an already unlinked 665 * file on a wsync filesystem 666 */ 667 code = xfs_itruncate_finish(&tp, ip, 668 (xfs_fsize_t)vap->va_size, 669 XFS_DATA_FORK, 670 ((ip->i_d.di_nlink != 0 || 671 !(mp->m_flags & XFS_MOUNT_WSYNC)) 672 ? 1 : 0)); 673 if (code) { 674 goto abort_return; 675 } 676 } 677 /* 678 * Have to do this even if the file's size doesn't change. 679 */ 680 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 681 } 682 683 /* 684 * Change file access modes. 685 */ 686 if (mask & XFS_AT_MODE) { 687 ip->i_d.di_mode &= S_IFMT; 688 ip->i_d.di_mode |= vap->va_mode & ~S_IFMT; 689 690 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 691 timeflags |= XFS_ICHGTIME_CHG; 692 } 693 694 /* 695 * Change file ownership. Must be the owner or privileged. 696 * If the system was configured with the "restricted_chown" 697 * option, the owner is not permitted to give away the file, 698 * and can change the group id only to a group of which he 699 * or she is a member. 700 */ 701 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 702 /* 703 * CAP_FSETID overrides the following restrictions: 704 * 705 * The set-user-ID and set-group-ID bits of a file will be 706 * cleared upon successful return from chown() 707 */ 708 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && 709 !capable(CAP_FSETID)) { 710 ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); 711 } 712 713 /* 714 * Change the ownerships and register quota modifications 715 * in the transaction. 716 */ 717 if (iuid != uid) { 718 if (XFS_IS_UQUOTA_ON(mp)) { 719 ASSERT(mask & XFS_AT_UID); 720 ASSERT(udqp); 721 olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 722 &ip->i_udquot, udqp); 723 } 724 ip->i_d.di_uid = uid; 725 } 726 if (igid != gid) { 727 if (XFS_IS_GQUOTA_ON(mp)) { 728 ASSERT(!XFS_IS_PQUOTA_ON(mp)); 729 ASSERT(mask & XFS_AT_GID); 730 ASSERT(gdqp); 731 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 732 &ip->i_gdquot, gdqp); 733 } 734 ip->i_d.di_gid = gid; 735 } 736 if (iprojid != projid) { 737 if (XFS_IS_PQUOTA_ON(mp)) { 738 ASSERT(!XFS_IS_GQUOTA_ON(mp)); 739 ASSERT(mask & XFS_AT_PROJID); 740 ASSERT(gdqp); 741 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 742 &ip->i_gdquot, gdqp); 743 } 744 ip->i_d.di_projid = projid; 745 /* 746 * We may have to rev the inode as well as 747 * the superblock version number since projids didn't 748 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. 749 */ 750 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) 751 xfs_bump_ino_vers2(tp, ip); 752 } 753 754 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 755 timeflags |= XFS_ICHGTIME_CHG; 756 } 757 758 759 /* 760 * Change file access or modified times. 761 */ 762 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 763 if (mask & XFS_AT_ATIME) { 764 ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec; 765 ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec; 766 ip->i_update_core = 1; 767 //timeflags &= ~XFS_ICHGTIME_ACC; 768 } 769 if (mask & XFS_AT_MTIME) { 770 ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec; 771 ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec; 772 timeflags &= ~XFS_ICHGTIME_MOD; 773 timeflags |= XFS_ICHGTIME_CHG; 774 } 775 if (tp && (flags & ATTR_UTIME)) 776 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 777 } 778 779 /* 780 * Change XFS-added attributes. 781 */ 782 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) { 783 if (mask & XFS_AT_EXTSIZE) { 784 /* 785 * Converting bytes to fs blocks. 786 */ 787 ip->i_d.di_extsize = vap->va_extsize >> 788 mp->m_sb.sb_blocklog; 789 } 790 if (mask & XFS_AT_XFLAGS) { 791 uint di_flags; 792 793 /* can't set PREALLOC this way, just preserve it */ 794 di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); 795 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) 796 di_flags |= XFS_DIFLAG_IMMUTABLE; 797 if (vap->va_xflags & XFS_XFLAG_APPEND) 798 di_flags |= XFS_DIFLAG_APPEND; 799 if (vap->va_xflags & XFS_XFLAG_SYNC) 800 di_flags |= XFS_DIFLAG_SYNC; 801 if (vap->va_xflags & XFS_XFLAG_NOATIME) 802 di_flags |= XFS_DIFLAG_NOATIME; 803 if (vap->va_xflags & XFS_XFLAG_NODUMP) 804 di_flags |= XFS_DIFLAG_NODUMP; 805 if (vap->va_xflags & XFS_XFLAG_PROJINHERIT) 806 di_flags |= XFS_DIFLAG_PROJINHERIT; 807 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 808 if (vap->va_xflags & XFS_XFLAG_RTINHERIT) 809 di_flags |= XFS_DIFLAG_RTINHERIT; 810 if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS) 811 di_flags |= XFS_DIFLAG_NOSYMLINKS; 812 if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT) 813 di_flags |= XFS_DIFLAG_EXTSZINHERIT; 814 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 815 if (vap->va_xflags & XFS_XFLAG_REALTIME) { 816 di_flags |= XFS_DIFLAG_REALTIME; 817 ip->i_iocore.io_flags |= XFS_IOCORE_RT; 818 } else { 819 ip->i_iocore.io_flags &= ~XFS_IOCORE_RT; 820 } 821 if (vap->va_xflags & XFS_XFLAG_EXTSIZE) 822 di_flags |= XFS_DIFLAG_EXTSIZE; 823 } 824 ip->i_d.di_flags = di_flags; 825 } 826 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 827 timeflags |= XFS_ICHGTIME_CHG; 828 } 829 830 /* 831 * Change file inode change time only if XFS_AT_CTIME set 832 * AND we have been called by a DMI function. 833 */ 834 835 if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) { 836 ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec; 837 ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec; 838 ip->i_update_core = 1; 839 timeflags &= ~XFS_ICHGTIME_CHG; 840 } 841 842 /* 843 * Send out timestamp changes that need to be set to the 844 * current time. Not done when called by a DMI function. 845 */ 846 if (timeflags && !(flags & ATTR_DMI)) 847 xfs_ichgtime(ip, timeflags); 848 849 XFS_STATS_INC(xs_ig_attrchg); 850 851 /* 852 * If this is a synchronous mount, make sure that the 853 * transaction goes to disk before returning to the user. 854 * This is slightly sub-optimal in that truncates require 855 * two sync transactions instead of one for wsync filesystems. 856 * One for the truncate and one for the timestamps since we 857 * don't want to change the timestamps unless we're sure the 858 * truncate worked. Truncates are less than 1% of the laddis 859 * mix so this probably isn't worth the trouble to optimize. 860 */ 861 code = 0; 862 if (tp) { 863 if (mp->m_flags & XFS_MOUNT_WSYNC) 864 xfs_trans_set_sync(tp); 865 866 code = xfs_trans_commit(tp, commit_flags, NULL); 867 } 868 869 /* 870 * If the (regular) file's mandatory locking mode changed, then 871 * notify the vnode. We do this under the inode lock to prevent 872 * racing calls to vop_vnode_change. 873 */ 874 mandlock_after = MANDLOCK(vp, ip->i_d.di_mode); 875 if (mandlock_before != mandlock_after) { 876 XVOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_ENF_LOCKING, 877 mandlock_after); 878 } 879 880 xfs_iunlock(ip, lock_flags); 881 882 /* 883 * Release any dquot(s) the inode had kept before chown. 884 */ 885 XFS_QM_DQRELE(mp, olddquot1); 886 XFS_QM_DQRELE(mp, olddquot2); 887 XFS_QM_DQRELE(mp, udqp); 888 XFS_QM_DQRELE(mp, gdqp); 889 890 if (code) { 891 return code; 892 } 893 894 if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_ATTRIBUTE) && 895 !(flags & ATTR_DMI)) { 896 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL, 897 NULL, DM_RIGHT_NULL, NULL, NULL, 898 0, 0, AT_DELAY_FLAG(flags)); 899 } 900 return 0; 901 902 abort_return: 903 commit_flags |= XFS_TRANS_ABORT; 904 /* FALLTHROUGH */ 905 error_return: 906 XFS_QM_DQRELE(mp, udqp); 907 XFS_QM_DQRELE(mp, gdqp); 908 if (tp) { 909 xfs_trans_cancel(tp, commit_flags); 910 } 911 if (lock_flags != 0) { 912 xfs_iunlock(ip, lock_flags); 913 } 914 return code; 915} 916 917 918/* 919 * xfs_access 920 * Null conversion from vnode mode bits to inode mode bits, as in efs. 921 */ 922STATIC int 923xfs_access( 924 bhv_desc_t *bdp, 925 accmode_t accmode, 926 cred_t *credp) 927{ 928 xfs_inode_t *ip; 929 int error; 930 931 vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__, 932 (inst_t *)__return_address); 933 934 ip = XFS_BHVTOI(bdp); 935 xfs_ilock(ip, XFS_ILOCK_SHARED); 936 error = xfs_iaccess(ip, accmode, credp); 937 xfs_iunlock(ip, XFS_ILOCK_SHARED); 938 return error; 939} 940 941 942/* 943 * xfs_readlink 944 * 945 */ 946STATIC int 947xfs_readlink( 948 bhv_desc_t *bdp, 949 uio_t *uiop, 950 int ioflags, 951 cred_t *credp) 952{ 953 xfs_inode_t *ip; 954 int count; 955 xfs_off_t offset; 956 int pathlen; 957 xfs_vnode_t *vp; 958 int error = 0; 959 xfs_mount_t *mp; 960 int nmaps; 961 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 962 xfs_daddr_t d; 963 int byte_cnt; 964 int n; 965 xfs_buf_t *bp; 966 967 vp = BHV_TO_VNODE(bdp); 968 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 969 970 ip = XFS_BHVTOI(bdp); 971 mp = ip->i_mount; 972 973 if (XFS_FORCED_SHUTDOWN(mp)) 974 return XFS_ERROR(EIO); 975 976 xfs_ilock(ip, XFS_ILOCK_SHARED); 977 978 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK); 979 980 offset = uiop->uio_offset; 981 count = uiop->uio_resid; 982 983 if (offset < 0) { 984 error = XFS_ERROR(EINVAL); 985 goto error_return; 986 } 987 if (count <= 0) { 988 error = 0; 989 goto error_return; 990 } 991 992 /* 993 * See if the symlink is stored inline. 994 */ 995 pathlen = (int)ip->i_d.di_size; 996 997 if (ip->i_df.if_flags & XFS_IFINLINE) { 998 error = uio_read(ip->i_df.if_u1.if_data, pathlen, uiop); 999 } 1000 else { 1001 /* 1002 * Symlink not inline. Call bmap to get it in. 1003 */ 1004 nmaps = SYMLINK_MAPS; 1005 1006 error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 1007 0, NULL, 0, mval, &nmaps, NULL, NULL); 1008 1009 if (error) { 1010 goto error_return; 1011 } 1012 1013 for (n = 0; n < nmaps; n++) { 1014 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 1015 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 1016 bp = xfs_buf_read(mp->m_ddev_targp, d, 1017 BTOBB(byte_cnt), 0); 1018 error = XFS_BUF_GETERROR(bp); 1019 if (error) { 1020 xfs_ioerror_alert("xfs_readlink", 1021 ip->i_mount, bp, XFS_BUF_ADDR(bp)); 1022 xfs_buf_relse(bp); 1023 goto error_return; 1024 } 1025 if (pathlen < byte_cnt) 1026 byte_cnt = pathlen; 1027 pathlen -= byte_cnt; 1028 1029 error = uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop); 1030 xfs_buf_relse (bp); 1031 } 1032 1033 } 1034 1035error_return: 1036 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1037 return error; 1038} 1039 1040 1041/* 1042 * xfs_fsync 1043 * 1044 * This is called to sync the inode and its data out to disk. 1045 * We need to hold the I/O lock while flushing the data, and 1046 * the inode lock while flushing the inode. The inode lock CANNOT 1047 * be held while flushing the data, so acquire after we're done 1048 * with that. 1049 */ 1050STATIC int 1051xfs_fsync( 1052 bhv_desc_t *bdp, 1053 int flag, 1054 cred_t *credp, 1055 xfs_off_t start, 1056 xfs_off_t stop) 1057{ 1058 xfs_inode_t *ip; 1059 xfs_trans_t *tp; 1060 int error; 1061 int log_flushed = 0, changed = 1; 1062 1063 vn_trace_entry(BHV_TO_VNODE(bdp), 1064 __FUNCTION__, (inst_t *)__return_address); 1065 1066 ip = XFS_BHVTOI(bdp); 1067 1068 ASSERT(start >= 0 && stop >= -1); 1069 1070 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 1071 return XFS_ERROR(EIO); 1072 1073 /* 1074 * We always need to make sure that the required inode state 1075 * is safe on disk. The vnode might be clean but because 1076 * of committed transactions that haven't hit the disk yet. 1077 * Likewise, there could be unflushed non-transactional 1078 * changes to the inode core that have to go to disk. 1079 * 1080 * The following code depends on one assumption: that 1081 * any transaction that changes an inode logs the core 1082 * because it has to change some field in the inode core 1083 * (typically nextents or nblocks). That assumption 1084 * implies that any transactions against an inode will 1085 * catch any non-transactional updates. If inode-altering 1086 * transactions exist that violate this assumption, the 1087 * code breaks. Right now, it figures that if the involved 1088 * update_* field is clear and the inode is unpinned, the 1089 * inode is clean. Either it's been flushed or it's been 1090 * committed and the commit has hit the disk unpinning the inode. 1091 * (Note that xfs_inode_item_format() called at commit clears 1092 * the update_* fields.) 1093 */ 1094 xfs_ilock(ip, XFS_ILOCK_SHARED); 1095 1096 /* If we are flushing data then we care about update_size 1097 * being set, otherwise we care about update_core 1098 */ 1099 if ((flag & FSYNC_DATA) ? 1100 (ip->i_update_size == 0) : 1101 (ip->i_update_core == 0)) { 1102 /* 1103 * Timestamps/size haven't changed since last inode 1104 * flush or inode transaction commit. That means 1105 * either nothing got written or a transaction 1106 * committed which caught the updates. If the 1107 * latter happened and the transaction hasn't 1108 * hit the disk yet, the inode will be still 1109 * be pinned. If it is, force the log. 1110 */ 1111 1112 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1113 1114 if (xfs_ipincount(ip)) { 1115 _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, 1116 XFS_LOG_FORCE | 1117 ((flag & FSYNC_WAIT) 1118 ? XFS_LOG_SYNC : 0), 1119 &log_flushed); 1120 } else { 1121 /* 1122 * If the inode is not pinned and nothing 1123 * has changed we don't need to flush the 1124 * cache. 1125 */ 1126 changed = 0; 1127 } 1128 error = 0; 1129 } else { 1130 /* 1131 * Kick off a transaction to log the inode 1132 * core to get the updates. Make it 1133 * sync if FSYNC_WAIT is passed in (which 1134 * is done by everybody but specfs). The 1135 * sync transaction will also force the log. 1136 */ 1137 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1138 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); 1139 if ((error = xfs_trans_reserve(tp, 0, 1140 XFS_FSYNC_TS_LOG_RES(ip->i_mount), 1141 0, 0, 0))) { 1142 xfs_trans_cancel(tp, 0); 1143 return error; 1144 } 1145 xfs_ilock(ip, XFS_ILOCK_EXCL); 1146 1147 /* 1148 * Note - it's possible that we might have pushed 1149 * ourselves out of the way during trans_reserve 1150 * which would flush the inode. But there's no 1151 * guarantee that the inode buffer has actually 1152 * gone out yet (it's delwri). Plus the buffer 1153 * could be pinned anyway if it's part of an 1154 * inode in another recent transaction. So we 1155 * play it safe and fire off the transaction anyway. 1156 */ 1157 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1158 xfs_trans_ihold(tp, ip); 1159 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1160 if (flag & FSYNC_WAIT) 1161 xfs_trans_set_sync(tp); 1162 error = _xfs_trans_commit(tp, 0, NULL, &log_flushed); 1163 1164 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1165 } 1166 1167 if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) { 1168 /* 1169 * If the log write didn't issue an ordered tag we need 1170 * to flush the disk cache for the data device now. 1171 */ 1172 if (!log_flushed) 1173 xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); 1174 1175 /* 1176 * If this inode is on the RT dev we need to flush that 1177 * cache as well. 1178 */ 1179 if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) 1180 xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); 1181 } 1182 1183 return error; 1184} 1185 1186/* 1187 * This is called by xfs_inactive to free any blocks beyond eof, 1188 * when the link count isn't zero. 1189 */ 1190STATIC int 1191xfs_inactive_free_eofblocks( 1192 xfs_mount_t *mp, 1193 xfs_inode_t *ip) 1194{ 1195 xfs_trans_t *tp; 1196 int error; 1197 xfs_fileoff_t end_fsb; 1198 xfs_fileoff_t last_fsb; 1199 xfs_filblks_t map_len; 1200 int nimaps; 1201 xfs_bmbt_irec_t imap; 1202 1203 /* 1204 * Figure out if there are any blocks beyond the end 1205 * of the file. If not, then there is nothing to do. 1206 */ 1207 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_d.di_size)); 1208 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 1209 map_len = last_fsb - end_fsb; 1210 if (map_len <= 0) 1211 return 0; 1212 1213 nimaps = 1; 1214 xfs_ilock(ip, XFS_ILOCK_SHARED); 1215 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, end_fsb, map_len, 0, 1216 NULL, 0, &imap, &nimaps, NULL, NULL); 1217 xfs_iunlock(ip, XFS_ILOCK_SHARED); 1218 1219 if (!error && (nimaps != 0) && 1220 (imap.br_startblock != HOLESTARTBLOCK || 1221 ip->i_delayed_blks)) { 1222 /* 1223 * Attach the dquots to the inode up front. 1224 */ 1225 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 1226 return error; 1227 1228 /* 1229 * There are blocks after the end of file. 1230 * Free them up now by truncating the file to 1231 * its current size. 1232 */ 1233 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1234 1235 /* 1236 * Do the xfs_itruncate_start() call before 1237 * reserving any log space because 1238 * itruncate_start will call into the buffer 1239 * cache and we can't 1240 * do that within a transaction. 1241 */ 1242 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1243 xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 1244 ip->i_d.di_size); 1245 1246 error = xfs_trans_reserve(tp, 0, 1247 XFS_ITRUNCATE_LOG_RES(mp), 1248 0, XFS_TRANS_PERM_LOG_RES, 1249 XFS_ITRUNCATE_LOG_COUNT); 1250 if (error) { 1251 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1252 xfs_trans_cancel(tp, 0); 1253 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1254 return error; 1255 } 1256 1257 xfs_ilock(ip, XFS_ILOCK_EXCL); 1258 xfs_trans_ijoin(tp, ip, 1259 XFS_IOLOCK_EXCL | 1260 XFS_ILOCK_EXCL); 1261 xfs_trans_ihold(tp, ip); 1262 1263 error = xfs_itruncate_finish(&tp, ip, 1264 ip->i_d.di_size, 1265 XFS_DATA_FORK, 1266 0); 1267 /* 1268 * If we get an error at this point we 1269 * simply don't bother truncating the file. 1270 */ 1271 if (error) { 1272 xfs_trans_cancel(tp, 1273 (XFS_TRANS_RELEASE_LOG_RES | 1274 XFS_TRANS_ABORT)); 1275 } else { 1276 error = xfs_trans_commit(tp, 1277 XFS_TRANS_RELEASE_LOG_RES, 1278 NULL); 1279 } 1280 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1281 } 1282 return error; 1283} 1284 1285/* 1286 * Free a symlink that has blocks associated with it. 1287 */ 1288STATIC int 1289xfs_inactive_symlink_rmt( 1290 xfs_inode_t *ip, 1291 xfs_trans_t **tpp) 1292{ 1293 xfs_buf_t *bp; 1294 int committed; 1295 int done; 1296 int error; 1297 xfs_fsblock_t first_block; 1298 xfs_bmap_free_t free_list; 1299 int i; 1300 xfs_mount_t *mp; 1301 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 1302 int nmaps; 1303 xfs_trans_t *ntp; 1304 int size; 1305 xfs_trans_t *tp; 1306 1307 tp = *tpp; 1308 mp = ip->i_mount; 1309 ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip)); 1310 /* 1311 * We're freeing a symlink that has some 1312 * blocks allocated to it. Free the 1313 * blocks here. We know that we've got 1314 * either 1 or 2 extents and that we can 1315 * free them all in one bunmapi call. 1316 */ 1317 ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); 1318 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1319 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 1320 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1321 xfs_trans_cancel(tp, 0); 1322 *tpp = NULL; 1323 return error; 1324 } 1325 /* 1326 * Lock the inode, fix the size, and join it to the transaction. 1327 * Hold it so in the normal path, we still have it locked for 1328 * the second transaction. In the error paths we need it 1329 * held so the cancel won't rele it, see below. 1330 */ 1331 xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1332 size = (int)ip->i_d.di_size; 1333 ip->i_d.di_size = 0; 1334 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1335 xfs_trans_ihold(tp, ip); 1336 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1337 /* 1338 * Find the block(s) so we can inval and unmap them. 1339 */ 1340 done = 0; 1341 XFS_BMAP_INIT(&free_list, &first_block); 1342 nmaps = sizeof(mval) / sizeof(mval[0]); 1343 if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), 1344 XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, 1345 &free_list, NULL))) 1346 goto error0; 1347 /* 1348 * Invalidate the block(s). 1349 */ 1350 for (i = 0; i < nmaps; i++) { 1351 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, 1352 XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), 1353 XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); 1354 xfs_trans_binval(tp, bp); 1355 } 1356 /* 1357 * Unmap the dead block(s) to the free_list. 1358 */ 1359 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, 1360 &first_block, &free_list, NULL, &done))) 1361 goto error1; 1362 ASSERT(done); 1363 /* 1364 * Commit the first transaction. This logs the EFI and the inode. 1365 */ 1366 if ((error = xfs_bmap_finish(&tp, &free_list, first_block, &committed))) 1367 goto error1; 1368 /* 1369 * The transaction must have been committed, since there were 1370 * actually extents freed by xfs_bunmapi. See xfs_bmap_finish. 1371 * The new tp has the extent freeing and EFDs. 1372 */ 1373 ASSERT(committed); 1374 /* 1375 * The first xact was committed, so add the inode to the new one. 1376 * Mark it dirty so it will be logged and moved forward in the log as 1377 * part of every commit. 1378 */ 1379 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1380 xfs_trans_ihold(tp, ip); 1381 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1382 /* 1383 * Get a new, empty transaction to return to our caller. 1384 */ 1385 ntp = xfs_trans_dup(tp); 1386 /* 1387 * Commit the transaction containing extent freeing and EFDs. 1388 * If we get an error on the commit here or on the reserve below, 1389 * we need to unlock the inode since the new transaction doesn't 1390 * have the inode attached. 1391 */ 1392 error = xfs_trans_commit(tp, 0, NULL); 1393 tp = ntp; 1394 if (error) { 1395 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1396 goto error0; 1397 } 1398 /* 1399 * Remove the memory for extent descriptions (just bookkeeping). 1400 */ 1401 if (ip->i_df.if_bytes) 1402 xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK); 1403 ASSERT(ip->i_df.if_bytes == 0); 1404 /* 1405 * Put an itruncate log reservation in the new transaction 1406 * for our caller. 1407 */ 1408 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1409 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 1410 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1411 goto error0; 1412 } 1413 /* 1414 * Return with the inode locked but not joined to the transaction. 1415 */ 1416 *tpp = tp; 1417 return 0; 1418 1419 error1: 1420 xfs_bmap_cancel(&free_list); 1421 error0: 1422 /* 1423 * Have to come here with the inode locked and either 1424 * (held and in the transaction) or (not in the transaction). 1425 * If the inode isn't held then cancel would iput it, but 1426 * that's wrong since this is inactive and the vnode ref 1427 * count is 0 already. 1428 * Cancel won't do anything to the inode if held, but it still 1429 * needs to be locked until the cancel is done, if it was 1430 * joined to the transaction. 1431 */ 1432 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1433 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1434 *tpp = NULL; 1435 return error; 1436 1437} 1438 1439STATIC int 1440xfs_inactive_symlink_local( 1441 xfs_inode_t *ip, 1442 xfs_trans_t **tpp) 1443{ 1444 int error; 1445 1446 ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip)); 1447 /* 1448 * We're freeing a symlink which fit into 1449 * the inode. Just free the memory used 1450 * to hold the old symlink. 1451 */ 1452 error = xfs_trans_reserve(*tpp, 0, 1453 XFS_ITRUNCATE_LOG_RES(ip->i_mount), 1454 0, XFS_TRANS_PERM_LOG_RES, 1455 XFS_ITRUNCATE_LOG_COUNT); 1456 1457 if (error) { 1458 xfs_trans_cancel(*tpp, 0); 1459 *tpp = NULL; 1460 return error; 1461 } 1462 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1463 1464 /* 1465 * Zero length symlinks _can_ exist. 1466 */ 1467 if (ip->i_df.if_bytes > 0) { 1468 xfs_idata_realloc(ip, 1469 -(ip->i_df.if_bytes), 1470 XFS_DATA_FORK); 1471 ASSERT(ip->i_df.if_bytes == 0); 1472 } 1473 return 0; 1474} 1475 1476/* 1477 * 1478 */ 1479STATIC int 1480xfs_inactive_attrs( 1481 xfs_inode_t *ip, 1482 xfs_trans_t **tpp) 1483{ 1484 xfs_trans_t *tp; 1485 int error; 1486 xfs_mount_t *mp; 1487 1488 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); 1489 tp = *tpp; 1490 mp = ip->i_mount; 1491 ASSERT(ip->i_d.di_forkoff != 0); 1492 xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 1493 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1494 1495 error = xfs_attr_inactive(ip); 1496 if (error) { 1497 *tpp = NULL; 1498 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1499 return error; /* goto out */ 1500 } 1501 1502 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1503 error = xfs_trans_reserve(tp, 0, 1504 XFS_IFREE_LOG_RES(mp), 1505 0, XFS_TRANS_PERM_LOG_RES, 1506 XFS_INACTIVE_LOG_COUNT); 1507 if (error) { 1508 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1509 xfs_trans_cancel(tp, 0); 1510 *tpp = NULL; 1511 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1512 return error; 1513 } 1514 1515 xfs_ilock(ip, XFS_ILOCK_EXCL); 1516 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1517 xfs_trans_ihold(tp, ip); 1518 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1519 1520 ASSERT(ip->i_d.di_anextents == 0); 1521 1522 *tpp = tp; 1523 return 0; 1524} 1525 1526STATIC int 1527xfs_release( 1528 bhv_desc_t *bdp) 1529{ 1530 xfs_inode_t *ip; 1531 xfs_vnode_t *vp; 1532 xfs_mount_t *mp; 1533 int error; 1534 1535 vp = BHV_TO_VNODE(bdp); 1536 ip = XFS_BHVTOI(bdp); 1537 1538 if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) { 1539 return 0; 1540 } 1541 1542 /* If this is a read-only mount, don't do this (would generate I/O) */ 1543 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 1544 return 0; 1545 1546#ifdef HAVE_REFCACHE 1547 /* If we are in the NFS reference cache then don't do this now */ 1548 if (ip->i_refcache) 1549 return 0; 1550#endif 1551 1552 mp = ip->i_mount; 1553 1554 if (ip->i_d.di_nlink != 0) { 1555 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1556 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || 1557 ip->i_delayed_blks > 0)) && 1558 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1559 (!(ip->i_d.di_flags & 1560 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 1561 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1562 return error; 1563 1564#ifdef RMC /* Update linux inode block count after free above */ 1565 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, 1566 ip->i_d.di_nblocks + ip->i_delayed_blks); 1567#endif 1568 } 1569 } 1570 1571 return 0; 1572} 1573 1574/* 1575 * xfs_inactive 1576 * 1577 * This is called when the vnode reference count for the vnode 1578 * goes to zero. If the file has been unlinked, then it must 1579 * now be truncated. Also, we clear all of the read-ahead state 1580 * kept for the inode here since the file is now closed. 1581 */ 1582STATIC int 1583xfs_inactive( 1584 bhv_desc_t *bdp, 1585 cred_t *credp) 1586{ 1587 xfs_inode_t *ip; 1588 xfs_vnode_t *vp; 1589 1590 xfs_bmap_free_t free_list; 1591 xfs_fsblock_t first_block; 1592 int committed; 1593 xfs_trans_t *tp; 1594 xfs_mount_t *mp; 1595 int error; 1596 int truncate; 1597 1598 vp = BHV_TO_VNODE(bdp); 1599 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 1600 1601 ip = XFS_BHVTOI(bdp); 1602 1603 /* 1604 * If the inode is already free, then there can be nothing 1605 * to clean up here. 1606 */ 1607 if (ip->i_d.di_mode == 0 || VN_BAD(vp)) { 1608 ASSERT(ip->i_df.if_real_bytes == 0); 1609 ASSERT(ip->i_df.if_broot_bytes == 0); 1610 return VN_INACTIVE_CACHE; 1611 } 1612 1613 /* 1614 * Only do a truncate if it's a regular file with 1615 * some actual space in it. It's OK to look at the 1616 * inode's fields without the lock because we're the 1617 * only one with a reference to the inode. 1618 */ 1619 truncate = ((ip->i_d.di_nlink == 0) && 1620 ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) || 1621 (ip->i_delayed_blks > 0)) && 1622 ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); 1623 1624 mp = ip->i_mount; 1625 1626 if (ip->i_d.di_nlink == 0 && 1627 DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_DESTROY)) { 1628 (void) XFS_SEND_DESTROY(mp, vp, DM_RIGHT_NULL); 1629 } 1630 1631 error = 0; 1632 1633 /* If this is a read-only mount, don't do this (would generate I/O) */ 1634 if (vp->v_vfsp->vfs_flag & VFS_RDONLY) 1635 goto out; 1636 1637 if (ip->i_d.di_nlink != 0) { 1638 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1639 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || 1640 ip->i_delayed_blks > 0)) && 1641 (ip->i_df.if_flags & XFS_IFEXTENTS) && 1642 (!(ip->i_d.di_flags & 1643 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 1644 (ip->i_delayed_blks != 0)))) { 1645 if ((error = xfs_inactive_free_eofblocks(mp, ip))) 1646 return VN_INACTIVE_CACHE; 1647#ifdef RMC 1648 /* Update linux inode block count after free above */ 1649 vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, 1650 ip->i_d.di_nblocks + ip->i_delayed_blks); 1651#endif 1652 } 1653 goto out; 1654 } 1655 1656 ASSERT(ip->i_d.di_nlink == 0); 1657 1658 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 1659 return VN_INACTIVE_CACHE; 1660 1661 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1662 if (truncate) { 1663 /* 1664 * Do the xfs_itruncate_start() call before 1665 * reserving any log space because itruncate_start 1666 * will call into the buffer cache and we can't 1667 * do that within a transaction. 1668 */ 1669 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1670 1671 xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); 1672 1673 error = xfs_trans_reserve(tp, 0, 1674 XFS_ITRUNCATE_LOG_RES(mp), 1675 0, XFS_TRANS_PERM_LOG_RES, 1676 XFS_ITRUNCATE_LOG_COUNT); 1677 if (error) { 1678 /* Don't call itruncate_cleanup */ 1679 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1680 xfs_trans_cancel(tp, 0); 1681 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1682 return VN_INACTIVE_CACHE; 1683 } 1684 1685 xfs_ilock(ip, XFS_ILOCK_EXCL); 1686 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1687 xfs_trans_ihold(tp, ip); 1688 1689 /* 1690 * normally, we have to run xfs_itruncate_finish sync. 1691 * But if filesystem is wsync and we're in the inactive 1692 * path, then we know that nlink == 0, and that the 1693 * xaction that made nlink == 0 is permanently committed 1694 * since xfs_remove runs as a synchronous transaction. 1695 */ 1696 error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1697 (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0)); 1698 1699 if (error) { 1700 xfs_trans_cancel(tp, 1701 XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1702 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1703 return VN_INACTIVE_CACHE; 1704 } 1705 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) { 1706 1707 /* 1708 * If we get an error while cleaning up a 1709 * symlink we bail out. 1710 */ 1711 error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ? 1712 xfs_inactive_symlink_rmt(ip, &tp) : 1713 xfs_inactive_symlink_local(ip, &tp); 1714 1715 if (error) { 1716 ASSERT(tp == NULL); 1717 return VN_INACTIVE_CACHE; 1718 } 1719 1720 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1721 xfs_trans_ihold(tp, ip); 1722 } else { 1723 error = xfs_trans_reserve(tp, 0, 1724 XFS_IFREE_LOG_RES(mp), 1725 0, XFS_TRANS_PERM_LOG_RES, 1726 XFS_INACTIVE_LOG_COUNT); 1727 if (error) { 1728 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1729 xfs_trans_cancel(tp, 0); 1730 return VN_INACTIVE_CACHE; 1731 } 1732 1733 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1734 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1735 xfs_trans_ihold(tp, ip); 1736 } 1737 1738 /* 1739 * If there are attributes associated with the file 1740 * then blow them away now. The code calls a routine 1741 * that recursively deconstructs the attribute fork. 1742 * We need to just commit the current transaction 1743 * because we can't use it for xfs_attr_inactive(). 1744 */ 1745 if (ip->i_d.di_anextents > 0) { 1746 error = xfs_inactive_attrs(ip, &tp); 1747 /* 1748 * If we got an error, the transaction is already 1749 * cancelled, and the inode is unlocked. Just get out. 1750 */ 1751 if (error) 1752 return VN_INACTIVE_CACHE; 1753 } else if (ip->i_afp) { 1754 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1755 } 1756 1757 /* 1758 * Free the inode. 1759 */ 1760 XFS_BMAP_INIT(&free_list, &first_block); 1761 error = xfs_ifree(tp, ip, &free_list); 1762 if (error) { 1763 /* 1764 * If we fail to free the inode, shut down. The cancel 1765 * might do that, we need to make sure. Otherwise the 1766 * inode might be lost for a long time or forever. 1767 */ 1768 if (!XFS_FORCED_SHUTDOWN(mp)) { 1769 cmn_err(CE_NOTE, 1770 "xfs_inactive: xfs_ifree() returned an error = %d on %s", 1771 error, mp->m_fsname); 1772 xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); 1773 } 1774 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 1775 } else { 1776 /* 1777 * Credit the quota account(s). The inode is gone. 1778 */ 1779 XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 1780 1781 /* 1782 * Just ignore errors at this point. There is 1783 * nothing we can do except to try to keep going. 1784 */ 1785 (void) xfs_bmap_finish(&tp, &free_list, first_block, 1786 &committed); 1787 (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 1788 } 1789 /* 1790 * Release the dquots held by inode, if any. 1791 */ 1792 XFS_QM_DQDETACH(mp, ip); 1793 1794 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1795 1796 out: 1797 return VN_INACTIVE_CACHE; 1798} 1799 1800 1801/* 1802 * xfs_lookup 1803 */ 1804STATIC int 1805xfs_lookup( 1806 bhv_desc_t *dir_bdp, 1807 vname_t *dentry, 1808 xfs_vnode_t **vpp, 1809 int flags, 1810 xfs_vnode_t *rdir, 1811 cred_t *credp) 1812{ 1813 xfs_inode_t *dp, *ip; 1814 xfs_ino_t e_inum; 1815 int error; 1816 uint lock_mode; 1817 xfs_vnode_t *dir_vp; 1818 1819 dir_vp = BHV_TO_VNODE(dir_bdp); 1820 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 1821 1822 dp = XFS_BHVTOI(dir_bdp); 1823 1824 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 1825 return XFS_ERROR(EIO); 1826 1827 lock_mode = xfs_ilock_map_shared(dp); 1828 error = xfs_dir_lookup_int(dir_bdp, lock_mode, dentry, &e_inum, &ip); 1829 if (!error) { 1830 *vpp = XFS_ITOV(ip); 1831 ITRACE(ip); 1832 } 1833 xfs_iunlock_map_shared(dp, lock_mode); 1834 return error; 1835} 1836 1837 1838/* 1839 * xfs_create (create a new file). 1840 */ 1841STATIC int 1842xfs_create( 1843 bhv_desc_t *dir_bdp, 1844 vname_t *dentry, 1845 xfs_vattr_t *vap, 1846 xfs_vnode_t **vpp, 1847 cred_t *credp) 1848{ 1849 char *name = VNAME(dentry); 1850 xfs_vnode_t *dir_vp; 1851 xfs_inode_t *dp, *ip; 1852 xfs_vnode_t *vp=NULL; 1853 xfs_trans_t *tp; 1854 xfs_mount_t *mp; 1855 xfs_dev_t rdev; 1856 int error; 1857 xfs_bmap_free_t free_list; 1858 xfs_fsblock_t first_block; 1859 boolean_t dp_joined_to_trans; 1860 int dm_event_sent = 0; 1861 uint cancel_flags; 1862 int committed; 1863 xfs_prid_t prid; 1864 struct xfs_dquot *udqp, *gdqp; 1865 uint resblks; 1866 int dm_di_mode; 1867 int namelen; 1868 1869 ASSERT(!*vpp); 1870 dir_vp = BHV_TO_VNODE(dir_bdp); 1871 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 1872 1873 dp = XFS_BHVTOI(dir_bdp); 1874 mp = dp->i_mount; 1875 1876 dm_di_mode = vap->va_mode; 1877 namelen = VNAMELEN(dentry); 1878 1879 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { 1880 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 1881 dir_vp, DM_RIGHT_NULL, NULL, 1882 DM_RIGHT_NULL, name, NULL, 1883 dm_di_mode, 0, 0); 1884 1885 if (error) 1886 return error; 1887 dm_event_sent = 1; 1888 } 1889 1890 if (XFS_FORCED_SHUTDOWN(mp)) 1891 return XFS_ERROR(EIO); 1892 1893 /* Return through std_return after this point. */ 1894 1895 udqp = gdqp = NULL; 1896 1897 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1898 prid = dp->i_d.di_projid; 1899 else if (vap->va_mask & XFS_AT_PROJID) 1900 prid = (xfs_prid_t)vap->va_projid; 1901 else 1902 prid = (xfs_prid_t)dfltprid; 1903 1904 /* 1905 * Make sure that we have allocated dquot(s) on disk. 1906 */ 1907 error = XFS_QM_DQVOPALLOC(mp, dp, 1908 current_fsuid(credp), current_fsgid(credp), prid, 1909 XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); 1910 if (error) 1911 goto std_return; 1912 1913 ip = NULL; 1914 dp_joined_to_trans = B_FALSE; 1915 1916 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); 1917 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1918 resblks = XFS_CREATE_SPACE_RES(mp, namelen); 1919 /* 1920 * Initially assume that the file does not exist and 1921 * reserve the resources for that case. If that is not 1922 * the case we'll drop the one we have and get a more 1923 * appropriate transaction later. 1924 */ 1925 error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, 1926 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); 1927 if (error == ENOSPC) { 1928 resblks = 0; 1929 error = xfs_trans_reserve(tp, 0, XFS_CREATE_LOG_RES(mp), 0, 1930 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); 1931 } 1932 if (error) { 1933 cancel_flags = 0; 1934 dp = NULL; 1935 goto error_return; 1936 } 1937 1938 xfs_ilock(dp, XFS_ILOCK_EXCL); 1939 1940 XFS_BMAP_INIT(&free_list, &first_block); 1941 1942 ASSERT(ip == NULL); 1943 1944 /* 1945 * Reserve disk quota and the inode. 1946 */ 1947 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 1948 if (error) 1949 goto error_return; 1950 1951 if (resblks == 0 && 1952 (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen))) 1953 goto error_return; 1954 rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0; 1955 error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1, 1956 rdev, credp, prid, resblks > 0, 1957 &ip, &committed); 1958 if (error) { 1959 if (error == ENOSPC) 1960 goto error_return; 1961 goto abort_return; 1962 } 1963 ITRACE(ip); 1964 1965 /* 1966 * At this point, we've gotten a newly allocated inode. 1967 * It is locked (and joined to the transaction). 1968 */ 1969 1970 ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); 1971 1972 /* 1973 * Now we join the directory inode to the transaction. 1974 * We do not do it earlier because xfs_dir_ialloc 1975 * might commit the previous transaction (and release 1976 * all the locks). 1977 */ 1978 1979 VN_HOLD(dir_vp); 1980 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1981 dp_joined_to_trans = B_TRUE; 1982 1983 error = XFS_DIR_CREATENAME(mp, tp, dp, name, namelen, ip->i_ino, 1984 &first_block, &free_list, 1985 resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1986 if (error) { 1987 ASSERT(error != ENOSPC); 1988 goto abort_return; 1989 } 1990 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1991 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1992 1993 /* 1994 * If this is a synchronous mount, make sure that the 1995 * create transaction goes to disk before returning to 1996 * the user. 1997 */ 1998 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 1999 xfs_trans_set_sync(tp); 2000 } 2001 2002 dp->i_gen++; 2003 2004 /* 2005 * Attach the dquot(s) to the inodes and modify them incore. 2006 * These ids of the inode couldn't have changed since the new 2007 * inode has been locked ever since it was created. 2008 */ 2009 XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); 2010 2011 /* 2012 * xfs_trans_commit normally decrements the vnode ref count 2013 * when it unlocks the inode. Since we want to return the 2014 * vnode to the caller, we bump the vnode ref count now. 2015 */ 2016 IHOLD(ip); 2017 vp = XFS_ITOV(ip); 2018 2019 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 2020 if (error) { 2021 xfs_bmap_cancel(&free_list); 2022 goto abort_rele; 2023 } 2024 2025 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2026 if (error) { 2027 IRELE(ip); 2028 tp = NULL; 2029 goto error_return; 2030 } 2031 2032 XFS_QM_DQRELE(mp, udqp); 2033 XFS_QM_DQRELE(mp, gdqp); 2034 2035 /* 2036 * Propagate the fact that the vnode changed after the 2037 * xfs_inode locks have been released. 2038 */ 2039 XVOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 3); 2040 2041 *vpp = vp; 2042 2043 /* Fallthrough to std_return with error = 0 */ 2044 2045std_return: 2046 if ( (*vpp || (error != 0 && dm_event_sent != 0)) && 2047 DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp), 2048 DM_EVENT_POSTCREATE)) { 2049 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 2050 dir_vp, DM_RIGHT_NULL, 2051 *vpp ? vp:NULL, 2052 DM_RIGHT_NULL, name, NULL, 2053 dm_di_mode, error, 0); 2054 } 2055 return error; 2056 2057 abort_return: 2058 cancel_flags |= XFS_TRANS_ABORT; 2059 /* FALLTHROUGH */ 2060 2061 error_return: 2062 if (tp != NULL) 2063 xfs_trans_cancel(tp, cancel_flags); 2064 2065 if (!dp_joined_to_trans && (dp != NULL)) 2066 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2067 XFS_QM_DQRELE(mp, udqp); 2068 XFS_QM_DQRELE(mp, gdqp); 2069 2070 goto std_return; 2071 2072 abort_rele: 2073 /* 2074 * Wait until after the current transaction is aborted to 2075 * release the inode. This prevents recursive transactions 2076 * and deadlocks from xfs_inactive. 2077 */ 2078 cancel_flags |= XFS_TRANS_ABORT; 2079 xfs_trans_cancel(tp, cancel_flags); 2080 IRELE(ip); 2081 2082 XFS_QM_DQRELE(mp, udqp); 2083 XFS_QM_DQRELE(mp, gdqp); 2084 2085 goto std_return; 2086} 2087 2088#ifdef DEBUG 2089/* 2090 * Some counters to see if (and how often) we are hitting some deadlock 2091 * prevention code paths. 2092 */ 2093 2094int xfs_rm_locks; 2095int xfs_rm_lock_delays; 2096int xfs_rm_attempts; 2097#endif 2098 2099/* 2100 * The following routine will lock the inodes associated with the 2101 * directory and the named entry in the directory. The locks are 2102 * acquired in increasing inode number. 2103 * 2104 * If the entry is "..", then only the directory is locked. The 2105 * vnode ref count will still include that from the .. entry in 2106 * this case. 2107 * 2108 * There is a deadlock we need to worry about. If the locked directory is 2109 * in the AIL, it might be blocking up the log. The next inode we lock 2110 * could be already locked by another thread waiting for log space (e.g 2111 * a permanent log reservation with a long running transaction (see 2112 * xfs_itruncate_finish)). To solve this, we must check if the directory 2113 * is in the ail and use lock_nowait. If we can't lock, we need to 2114 * drop the inode lock on the directory and try again. xfs_iunlock will 2115 * potentially push the tail if we were holding up the log. 2116 */ 2117STATIC int 2118xfs_lock_dir_and_entry( 2119 xfs_inode_t *dp, 2120 vname_t *dentry, 2121 xfs_inode_t *ip) /* inode of entry 'name' */ 2122{ 2123 int attempts; 2124 xfs_ino_t e_inum; 2125 xfs_inode_t *ips[2]; 2126 xfs_log_item_t *lp; 2127 2128#ifdef DEBUG 2129 xfs_rm_locks++; 2130#endif 2131 attempts = 0; 2132 2133again: 2134 xfs_ilock(dp, XFS_ILOCK_EXCL); 2135 2136 e_inum = ip->i_ino; 2137 2138 ITRACE(ip); 2139 2140 /* 2141 * We want to lock in increasing inum. Since we've already 2142 * acquired the lock on the directory, we may need to release 2143 * if if the inum of the entry turns out to be less. 2144 */ 2145 if (e_inum > dp->i_ino) { 2146 /* 2147 * We are already in the right order, so just 2148 * lock on the inode of the entry. 2149 * We need to use nowait if dp is in the AIL. 2150 */ 2151 2152 lp = (xfs_log_item_t *)dp->i_itemp; 2153 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 2154 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2155 attempts++; 2156#ifdef DEBUG 2157 xfs_rm_attempts++; 2158#endif 2159 2160 /* 2161 * Unlock dp and try again. 2162 * xfs_iunlock will try to push the tail 2163 * if the inode is in the AIL. 2164 */ 2165 2166 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2167 2168 if ((attempts % 5) == 0) { 2169 delay(1); /* Don't just spin the CPU */ 2170#ifdef DEBUG 2171 xfs_rm_lock_delays++; 2172#endif 2173 } 2174 goto again; 2175 } 2176 } else { 2177 xfs_ilock(ip, XFS_ILOCK_EXCL); 2178 } 2179 } else if (e_inum < dp->i_ino) { 2180 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2181 2182 ips[0] = ip; 2183 ips[1] = dp; 2184 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 2185 } 2186 /* else e_inum == dp->i_ino */ 2187 /* This can happen if we're asked to lock /x/.. 2188 * the entry is "..", which is also the parent directory. 2189 */ 2190 2191 return 0; 2192} 2193 2194#ifdef DEBUG 2195int xfs_locked_n; 2196int xfs_small_retries; 2197int xfs_middle_retries; 2198int xfs_lots_retries; 2199int xfs_lock_delays; 2200#endif 2201 2202/* 2203 * The following routine will lock n inodes in exclusive mode. 2204 * We assume the caller calls us with the inodes in i_ino order. 2205 * 2206 * We need to detect deadlock where an inode that we lock 2207 * is in the AIL and we start waiting for another inode that is locked 2208 * by a thread in a long running transaction (such as truncate). This can 2209 * result in deadlock since the long running trans might need to wait 2210 * for the inode we just locked in order to push the tail and free space 2211 * in the log. 2212 */ 2213void 2214xfs_lock_inodes( 2215 xfs_inode_t **ips, 2216 int inodes, 2217 int first_locked, 2218 uint lock_mode) 2219{ 2220 int attempts = 0, i, j, try_lock; 2221 xfs_log_item_t *lp; 2222 2223 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 2224 2225 if (first_locked) { 2226 try_lock = 1; 2227 i = 1; 2228 } else { 2229 try_lock = 0; 2230 i = 0; 2231 } 2232 2233again: 2234 for (; i < inodes; i++) { 2235 ASSERT(ips[i]); 2236 2237 if (i && (ips[i] == ips[i-1])) /* Already locked */ 2238 continue; 2239 2240 /* 2241 * If try_lock is not set yet, make sure all locked inodes 2242 * are not in the AIL. 2243 * If any are, set try_lock to be used later. 2244 */ 2245 2246 if (!try_lock) { 2247 for (j = (i - 1); j >= 0 && !try_lock; j--) { 2248 lp = (xfs_log_item_t *)ips[j]->i_itemp; 2249 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 2250 try_lock++; 2251 } 2252 } 2253 } 2254 2255 /* 2256 * If any of the previous locks we have locked is in the AIL, 2257 * we must TRY to get the second and subsequent locks. If 2258 * we can't get any, we must release all we have 2259 * and try again. 2260 */ 2261 2262 if (try_lock) { 2263 /* try_lock must be 0 if i is 0. */ 2264 /* 2265 * try_lock means we have an inode locked 2266 * that is in the AIL. 2267 */ 2268 ASSERT(i != 0); 2269 if (!xfs_ilock_nowait(ips[i], lock_mode)) { 2270 attempts++; 2271 2272 /* 2273 * Unlock all previous guys and try again. 2274 * xfs_iunlock will try to push the tail 2275 * if the inode is in the AIL. 2276 */ 2277 2278 for(j = i - 1; j >= 0; j--) { 2279 2280 /* 2281 * Check to see if we've already 2282 * unlocked this one. 2283 * Not the first one going back, 2284 * and the inode ptr is the same. 2285 */ 2286 if ((j != (i - 1)) && ips[j] == 2287 ips[j+1]) 2288 continue; 2289 2290 xfs_iunlock(ips[j], lock_mode); 2291 } 2292 2293 if ((attempts % 5) == 0) { 2294 delay(1); /* Don't just spin the CPU */ 2295#ifdef DEBUG 2296 xfs_lock_delays++; 2297#endif 2298 } 2299 i = 0; 2300 try_lock = 0; 2301 goto again; 2302 } 2303 } else { 2304 xfs_ilock(ips[i], lock_mode); 2305 } 2306 } 2307 2308#ifdef DEBUG 2309 if (attempts) { 2310 if (attempts < 5) xfs_small_retries++; 2311 else if (attempts < 100) xfs_middle_retries++; 2312 else xfs_lots_retries++; 2313 } else { 2314 xfs_locked_n++; 2315 } 2316#endif 2317} 2318 2319#ifdef DEBUG 2320#define REMOVE_DEBUG_TRACE(x) {remove_which_error_return = (x);} 2321int remove_which_error_return = 0; 2322#else /* ! DEBUG */ 2323#define REMOVE_DEBUG_TRACE(x) 2324#endif /* ! DEBUG */ 2325 2326extern int xfs_remove(bhv_desc_t *, bhv_desc_t *, vname_t *, cred_t *); 2327/* 2328 * xfs_remove 2329 * 2330 */ 2331int 2332xfs_remove( 2333 bhv_desc_t *dir_bdp, 2334 bhv_desc_t *vp_bdp, 2335 vname_t *dentry, 2336 cred_t *credp) 2337{ 2338 xfs_vnode_t *dir_vp; 2339 xfs_vnode_t *xvp; 2340 char *name = VNAME(dentry); 2341 xfs_inode_t *dp, *ip; 2342 xfs_trans_t *tp = NULL; 2343 xfs_mount_t *mp; 2344 int error = 0; 2345 xfs_bmap_free_t free_list; 2346 xfs_fsblock_t first_block; 2347 int cancel_flags; 2348 int committed; 2349 int dm_di_mode = 0; 2350 int link_zero; 2351 uint resblks; 2352 int namelen; 2353 2354 dir_vp = BHV_TO_VNODE(dir_bdp); 2355 xvp = BHV_TO_VNODE(vp_bdp); 2356 2357 printf("xfs_remove: dvp %p vp %p\n",dir_vp,xvp); 2358 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 2359 2360 dp = XFS_BHVTOI(dir_bdp); 2361 mp = dp->i_mount; 2362 2363 if (XFS_FORCED_SHUTDOWN(mp)) 2364 return XFS_ERROR(EIO); 2365 2366 namelen = VNAMELEN(dentry); 2367 2368 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { 2369 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, 2370 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 2371 name, NULL, 0, 0, 0); 2372 if (error) 2373 return error; 2374 } 2375 2376 /* From this point on, return through std_return */ 2377 ip = NULL; 2378 2379 /* 2380 * We need to get a reference to ip before we get our log 2381 * reservation. The reason for this is that we cannot call 2382 * xfs_iget for an inode for which we do not have a reference 2383 * once we've acquired a log reservation. This is because the 2384 * inode we are trying to get might be in xfs_inactive going 2385 * for a log reservation. Since we'll have to wait for the 2386 * inactive code to complete before returning from xfs_iget, 2387 * we need to make sure that we don't have log space reserved 2388 * when we call xfs_iget. Instead we get an unlocked reference 2389 * to the inode before getting our log reservation. 2390 */ 2391#ifdef RMC 2392 error = xfs_get_dir_entry(dentry, &ip); 2393#endif 2394 /* FreeBSD has already done the lookup */ 2395 ip = xvp->v_inode; 2396 VN_HOLD(xvp); 2397 2398 if (error) { 2399 REMOVE_DEBUG_TRACE(__LINE__); 2400 goto std_return; 2401 } 2402 2403 dm_di_mode = ip->i_d.di_mode; 2404 2405 vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 2406 2407 ITRACE(ip); 2408 2409 error = XFS_QM_DQATTACH(mp, dp, 0); 2410 if (!error && dp != ip) 2411 error = XFS_QM_DQATTACH(mp, ip, 0); 2412 if (error) { 2413 REMOVE_DEBUG_TRACE(__LINE__); 2414 IRELE(ip); 2415 goto std_return; 2416 } 2417 2418 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); 2419 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2420 /* 2421 * We try to get the real space reservation first, 2422 * allowing for directory btree deletion(s) implying 2423 * possible bmap insert(s). If we can't get the space 2424 * reservation then we use 0 instead, and avoid the bmap 2425 * btree insert(s) in the directory code by, if the bmap 2426 * insert tries to happen, instead trimming the LAST 2427 * block from the directory. 2428 */ 2429 resblks = XFS_REMOVE_SPACE_RES(mp); 2430 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 2431 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 2432 if (error == ENOSPC) { 2433 resblks = 0; 2434 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 2435 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 2436 } 2437 if (error) { 2438 ASSERT(error != ENOSPC); 2439 REMOVE_DEBUG_TRACE(__LINE__); 2440 xfs_trans_cancel(tp, 0); 2441 IRELE(ip); 2442 return error; 2443 } 2444 2445 error = xfs_lock_dir_and_entry(dp, dentry, ip); 2446 if (error) { 2447 REMOVE_DEBUG_TRACE(__LINE__); 2448 xfs_trans_cancel(tp, cancel_flags); 2449 IRELE(ip); 2450 goto std_return; 2451 } 2452 2453 /* 2454 * At this point, we've gotten both the directory and the entry 2455 * inodes locked. 2456 */ 2457 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2458 if (dp != ip) { 2459 /* 2460 * Increment vnode ref count only in this case since 2461 * there's an extra vnode reference in the case where 2462 * dp == ip. 2463 */ 2464 IHOLD(dp); 2465 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2466 } 2467 2468 /* 2469 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. 2470 */ 2471 XFS_BMAP_INIT(&free_list, &first_block); 2472 error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, ip->i_ino, 2473 &first_block, &free_list, 0); 2474 if (error) { 2475 ASSERT(error != ENOENT); 2476 REMOVE_DEBUG_TRACE(__LINE__); 2477 goto error1; 2478 } 2479 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2480 2481 dp->i_gen++; 2482 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2483 2484 error = xfs_droplink(tp, ip); 2485 if (error) { 2486 REMOVE_DEBUG_TRACE(__LINE__); 2487 goto error1; 2488 } 2489 2490 /* Determine if this is the last link while 2491 * we are in the transaction. 2492 */ 2493 link_zero = (ip)->i_d.di_nlink==0; 2494 2495 /* 2496 * Take an extra ref on the inode so that it doesn't 2497 * go to xfs_inactive() from within the commit. 2498 */ 2499 IHOLD(ip); 2500 2501 /* 2502 * If this is a synchronous mount, make sure that the 2503 * remove transaction goes to disk before returning to 2504 * the user. 2505 */ 2506 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2507 xfs_trans_set_sync(tp); 2508 } 2509 2510 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 2511 if (error) { 2512 REMOVE_DEBUG_TRACE(__LINE__); 2513 goto error_rele; 2514 } 2515 2516 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2517 if (error) { 2518 IRELE(ip); 2519 goto std_return; 2520 } 2521 2522 /* 2523 * Before we drop our extra reference to the inode, purge it 2524 * from the refcache if it is there. By waiting until afterwards 2525 * to do the IRELE, we ensure that we won't go inactive in the 2526 * xfs_refcache_purge_ip routine (although that would be OK). 2527 */ 2528 xfs_refcache_purge_ip(ip); 2529 2530 vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 2531 2532 /* 2533 * Let interposed file systems know about removed links. 2534 */ 2535 XVOP_LINK_REMOVED(XFS_ITOV(ip), dir_vp, link_zero); 2536 2537 IRELE(ip); 2538 2539/* Fall through to std_return with error = 0 */ 2540 std_return: 2541 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, 2542 DM_EVENT_POSTREMOVE)) { 2543 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 2544 dir_vp, DM_RIGHT_NULL, 2545 NULL, DM_RIGHT_NULL, 2546 name, NULL, dm_di_mode, error, 0); 2547 } 2548 return error; 2549 2550 error1: 2551 xfs_bmap_cancel(&free_list); 2552 cancel_flags |= XFS_TRANS_ABORT; 2553 xfs_trans_cancel(tp, cancel_flags); 2554 goto std_return; 2555 2556 error_rele: 2557 /* 2558 * In this case make sure to not release the inode until after 2559 * the current transaction is aborted. Releasing it beforehand 2560 * can cause us to go to xfs_inactive and start a recursive 2561 * transaction which can easily deadlock with the current one. 2562 */ 2563 xfs_bmap_cancel(&free_list); 2564 cancel_flags |= XFS_TRANS_ABORT; 2565 xfs_trans_cancel(tp, cancel_flags); 2566 2567 /* 2568 * Before we drop our extra reference to the inode, purge it 2569 * from the refcache if it is there. By waiting until afterwards 2570 * to do the IRELE, we ensure that we won't go inactive in the 2571 * xfs_refcache_purge_ip routine (although that would be OK). 2572 */ 2573 xfs_refcache_purge_ip(ip); 2574 2575 IRELE(ip); 2576 2577 goto std_return; 2578} 2579 2580 2581/* 2582 * xfs_link 2583 * 2584 */ 2585STATIC int 2586xfs_link( 2587 bhv_desc_t *target_dir_bdp, 2588 xfs_vnode_t *src_vp, 2589 vname_t *dentry, 2590 cred_t *credp) 2591{ 2592 xfs_inode_t *tdp, *sip; 2593 xfs_trans_t *tp; 2594 xfs_mount_t *mp; 2595 xfs_inode_t *ips[2]; 2596 int error; 2597 xfs_bmap_free_t free_list; 2598 xfs_fsblock_t first_block; 2599 int cancel_flags; 2600 int committed; 2601 xfs_vnode_t *target_dir_vp; 2602 int resblks; 2603 char *target_name = VNAME(dentry); 2604 int target_namelen; 2605 2606 target_dir_vp = BHV_TO_VNODE(target_dir_bdp); 2607 vn_trace_entry(target_dir_vp, __FUNCTION__, (inst_t *)__return_address); 2608 vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); 2609 2610 target_namelen = VNAMELEN(dentry); 2611 if (VN_ISDIR(src_vp)) 2612 return XFS_ERROR(EPERM); 2613 2614 sip = xfs_vtoi(src_vp); 2615 tdp = XFS_BHVTOI(target_dir_bdp); 2616 mp = tdp->i_mount; 2617 if (XFS_FORCED_SHUTDOWN(mp)) 2618 return XFS_ERROR(EIO); 2619 2620 if (DM_EVENT_ENABLED(src_vp->v_vfsp, tdp, DM_EVENT_LINK)) { 2621 error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK, 2622 target_dir_vp, DM_RIGHT_NULL, 2623 src_vp, DM_RIGHT_NULL, 2624 target_name, NULL, 0, 0, 0); 2625 if (error) 2626 return error; 2627 } 2628 2629 /* Return through std_return after this point. */ 2630 2631 error = XFS_QM_DQATTACH(mp, sip, 0); 2632 if (!error && sip != tdp) 2633 error = XFS_QM_DQATTACH(mp, tdp, 0); 2634 if (error) 2635 goto std_return; 2636 2637 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); 2638 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2639 resblks = XFS_LINK_SPACE_RES(mp, target_namelen); 2640 error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0, 2641 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 2642 if (error == ENOSPC) { 2643 resblks = 0; 2644 error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0, 2645 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 2646 } 2647 if (error) { 2648 cancel_flags = 0; 2649 goto error_return; 2650 } 2651 2652 if (sip->i_ino < tdp->i_ino) { 2653 ips[0] = sip; 2654 ips[1] = tdp; 2655 } else { 2656 ips[0] = tdp; 2657 ips[1] = sip; 2658 } 2659 2660 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 2661 2662 /* 2663 * Increment vnode ref counts since xfs_trans_commit & 2664 * xfs_trans_cancel will both unlock the inodes and 2665 * decrement the associated ref counts. 2666 */ 2667 VN_HOLD(src_vp); 2668 VN_HOLD(target_dir_vp); 2669 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 2670 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 2671 2672 /* 2673 * If the source has too many links, we can't make any more to it. 2674 */ 2675 if (sip->i_d.di_nlink >= XFS_MAXLINK) { 2676 error = XFS_ERROR(EMLINK); 2677 goto error_return; 2678 } 2679 2680 /* 2681 * If we are using project inheritance, we only allow hard link 2682 * creation in our tree when the project IDs are the same; else 2683 * the tree quota mechanism could be circumvented. 2684 */ 2685 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 2686 (tdp->i_d.di_projid != sip->i_d.di_projid))) { 2687 error = XFS_ERROR(EPERM); 2688 goto error_return; 2689 } 2690 2691 if (resblks == 0 && 2692 (error = XFS_DIR_CANENTER(mp, tp, tdp, target_name, 2693 target_namelen))) 2694 goto error_return; 2695 2696 XFS_BMAP_INIT(&free_list, &first_block); 2697 2698 error = XFS_DIR_CREATENAME(mp, tp, tdp, target_name, target_namelen, 2699 sip->i_ino, &first_block, &free_list, 2700 resblks); 2701 if (error) 2702 goto abort_return; 2703 xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2704 tdp->i_gen++; 2705 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 2706 2707 error = xfs_bumplink(tp, sip); 2708 if (error) { 2709 goto abort_return; 2710 } 2711 2712 /* 2713 * If this is a synchronous mount, make sure that the 2714 * link transaction goes to disk before returning to 2715 * the user. 2716 */ 2717 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2718 xfs_trans_set_sync(tp); 2719 } 2720 2721 error = xfs_bmap_finish (&tp, &free_list, first_block, &committed); 2722 if (error) { 2723 xfs_bmap_cancel(&free_list); 2724 goto abort_return; 2725 } 2726 2727 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2728 if (error) { 2729 goto std_return; 2730 } 2731 2732 /* Fall through to std_return with error = 0. */ 2733std_return: 2734 if (DM_EVENT_ENABLED(src_vp->v_vfsp, sip, 2735 DM_EVENT_POSTLINK)) { 2736 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK, 2737 target_dir_vp, DM_RIGHT_NULL, 2738 src_vp, DM_RIGHT_NULL, 2739 target_name, NULL, 0, error, 0); 2740 } 2741 return error; 2742 2743 abort_return: 2744 cancel_flags |= XFS_TRANS_ABORT; 2745 /* FALLTHROUGH */ 2746 2747 error_return: 2748 xfs_trans_cancel(tp, cancel_flags); 2749 goto std_return; 2750} 2751/* 2752 * xfs_mkdir 2753 * 2754 */ 2755STATIC int 2756xfs_mkdir( 2757 bhv_desc_t *dir_bdp, 2758 vname_t *dentry, 2759 xfs_vattr_t *vap, 2760 xfs_vnode_t **vpp, 2761 cred_t *credp) 2762{ 2763 char *dir_name = VNAME(dentry); 2764 xfs_inode_t *dp; 2765 xfs_inode_t *cdp; /* inode of created dir */ 2766 xfs_vnode_t *cvp; /* vnode of created dir */ 2767 xfs_trans_t *tp; 2768 xfs_mount_t *mp; 2769 int cancel_flags; 2770 int error; 2771 int committed; 2772 xfs_bmap_free_t free_list; 2773 xfs_fsblock_t first_block; 2774 xfs_vnode_t *dir_vp; 2775 boolean_t dp_joined_to_trans; 2776 boolean_t created = B_FALSE; 2777 int dm_event_sent = 0; 2778 xfs_prid_t prid; 2779 struct xfs_dquot *udqp, *gdqp; 2780 uint resblks; 2781 int dm_di_mode; 2782 int dir_namelen; 2783 2784 dir_vp = BHV_TO_VNODE(dir_bdp); 2785 dp = XFS_BHVTOI(dir_bdp); 2786 mp = dp->i_mount; 2787 2788 if (XFS_FORCED_SHUTDOWN(mp)) 2789 return XFS_ERROR(EIO); 2790 2791 dir_namelen = VNAMELEN(dentry); 2792 2793 tp = NULL; 2794 dp_joined_to_trans = B_FALSE; 2795 dm_di_mode = vap->va_mode; 2796 2797 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { 2798 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, 2799 dir_vp, DM_RIGHT_NULL, NULL, 2800 DM_RIGHT_NULL, dir_name, NULL, 2801 dm_di_mode, 0, 0); 2802 if (error) 2803 return error; 2804 dm_event_sent = 1; 2805 } 2806 2807 /* Return through std_return after this point. */ 2808 2809 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 2810 2811 mp = dp->i_mount; 2812 udqp = gdqp = NULL; 2813 2814 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 2815 prid = dp->i_d.di_projid; 2816 else if (vap->va_mask & XFS_AT_PROJID) 2817 prid = (xfs_prid_t)vap->va_projid; 2818 else 2819 prid = (xfs_prid_t)dfltprid; 2820 2821 /* 2822 * Make sure that we have allocated dquot(s) on disk. 2823 */ 2824 error = XFS_QM_DQVOPALLOC(mp, dp, 2825 current_fsuid(credp), current_fsgid(credp), prid, 2826 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 2827 if (error) 2828 goto std_return; 2829 2830 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); 2831 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2832 resblks = XFS_MKDIR_SPACE_RES(mp, dir_namelen); 2833 error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0, 2834 XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT); 2835 if (error == ENOSPC) { 2836 resblks = 0; 2837 error = xfs_trans_reserve(tp, 0, XFS_MKDIR_LOG_RES(mp), 0, 2838 XFS_TRANS_PERM_LOG_RES, 2839 XFS_MKDIR_LOG_COUNT); 2840 } 2841 if (error) { 2842 cancel_flags = 0; 2843 dp = NULL; 2844 goto error_return; 2845 } 2846 2847 xfs_ilock(dp, XFS_ILOCK_EXCL); 2848 2849 /* 2850 * Check for directory link count overflow. 2851 */ 2852 if (dp->i_d.di_nlink >= XFS_MAXLINK) { 2853 error = XFS_ERROR(EMLINK); 2854 goto error_return; 2855 } 2856 2857 /* 2858 * Reserve disk quota and the inode. 2859 */ 2860 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 2861 if (error) 2862 goto error_return; 2863 2864 if (resblks == 0 && 2865 (error = XFS_DIR_CANENTER(mp, tp, dp, dir_name, dir_namelen))) 2866 goto error_return; 2867 /* 2868 * create the directory inode. 2869 */ 2870 error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 2, 2871 0, credp, prid, resblks > 0, 2872 &cdp, NULL); 2873 if (error) { 2874 if (error == ENOSPC) 2875 goto error_return; 2876 goto abort_return; 2877 } 2878 ITRACE(cdp); 2879 2880 /* 2881 * Now we add the directory inode to the transaction. 2882 * We waited until now since xfs_dir_ialloc might start 2883 * a new transaction. Had we joined the transaction 2884 * earlier, the locks might have gotten released. 2885 */ 2886 VN_HOLD(dir_vp); 2887 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2888 dp_joined_to_trans = B_TRUE; 2889 2890 XFS_BMAP_INIT(&free_list, &first_block); 2891 2892 error = XFS_DIR_CREATENAME(mp, tp, dp, dir_name, dir_namelen, 2893 cdp->i_ino, &first_block, &free_list, 2894 resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 2895 if (error) { 2896 ASSERT(error != ENOSPC); 2897 goto error1; 2898 } 2899 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2900 2901 /* 2902 * Bump the in memory version number of the parent directory 2903 * so that other processes accessing it will recognize that 2904 * the directory has changed. 2905 */ 2906 dp->i_gen++; 2907 2908 error = XFS_DIR_INIT(mp, tp, cdp, dp); 2909 if (error) { 2910 goto error2; 2911 } 2912 2913 cdp->i_gen = 1; 2914 error = xfs_bumplink(tp, dp); 2915 if (error) { 2916 goto error2; 2917 } 2918 2919 cvp = XFS_ITOV(cdp); 2920 2921 created = B_TRUE; 2922 2923 *vpp = cvp; 2924 IHOLD(cdp); 2925 2926 /* 2927 * Attach the dquots to the new inode and modify the icount incore. 2928 */ 2929 XFS_QM_DQVOPCREATE(mp, tp, cdp, udqp, gdqp); 2930 2931 /* 2932 * If this is a synchronous mount, make sure that the 2933 * mkdir transaction goes to disk before returning to 2934 * the user. 2935 */ 2936 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2937 xfs_trans_set_sync(tp); 2938 } 2939 2940 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 2941 if (error) { 2942 IRELE(cdp); 2943 goto error2; 2944 } 2945 2946 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 2947 XFS_QM_DQRELE(mp, udqp); 2948 XFS_QM_DQRELE(mp, gdqp); 2949 if (error) { 2950 IRELE(cdp); 2951 } 2952 2953 /* Fall through to std_return with error = 0 or errno from 2954 * xfs_trans_commit. */ 2955 2956std_return: 2957 if ( (created || (error != 0 && dm_event_sent != 0)) && 2958 DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp), 2959 DM_EVENT_POSTCREATE)) { 2960 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, 2961 dir_vp, DM_RIGHT_NULL, 2962 created ? XFS_ITOV(cdp):NULL, 2963 DM_RIGHT_NULL, 2964 dir_name, NULL, 2965 dm_di_mode, error, 0); 2966 } 2967 return error; 2968 2969 error2: 2970 error1: 2971 xfs_bmap_cancel(&free_list); 2972 abort_return: 2973 cancel_flags |= XFS_TRANS_ABORT; 2974 error_return: 2975 xfs_trans_cancel(tp, cancel_flags); 2976 XFS_QM_DQRELE(mp, udqp); 2977 XFS_QM_DQRELE(mp, gdqp); 2978 2979 if (!dp_joined_to_trans && (dp != NULL)) { 2980 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2981 } 2982 2983 goto std_return; 2984} 2985 2986 2987/* 2988 * xfs_rmdir 2989 * 2990 */ 2991STATIC int 2992xfs_rmdir( 2993 bhv_desc_t *dir_bdp, 2994 vname_t *dentry, 2995 cred_t *credp) 2996{ 2997 char *name = VNAME(dentry); 2998 xfs_inode_t *dp; 2999 xfs_inode_t *cdp; /* child directory */ 3000 xfs_trans_t *tp; 3001 xfs_mount_t *mp; 3002 int error; 3003 xfs_bmap_free_t free_list; 3004 xfs_fsblock_t first_block; 3005 int cancel_flags; 3006 int committed; 3007 xfs_vnode_t *dir_vp; 3008 int dm_di_mode = 0; 3009 int last_cdp_link; 3010 int namelen; 3011 uint resblks; 3012 3013 dir_vp = BHV_TO_VNODE(dir_bdp); 3014 dp = XFS_BHVTOI(dir_bdp); 3015 mp = dp->i_mount; 3016 3017 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 3018 3019 if (XFS_FORCED_SHUTDOWN(XFS_BHVTOI(dir_bdp)->i_mount)) 3020 return XFS_ERROR(EIO); 3021 namelen = VNAMELEN(dentry); 3022 3023 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { 3024 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, 3025 dir_vp, DM_RIGHT_NULL, 3026 NULL, DM_RIGHT_NULL, 3027 name, NULL, 0, 0, 0); 3028 if (error) 3029 return XFS_ERROR(error); 3030 } 3031 3032 /* Return through std_return after this point. */ 3033 3034 cdp = NULL; 3035 3036 /* 3037 * We need to get a reference to cdp before we get our log 3038 * reservation. The reason for this is that we cannot call 3039 * xfs_iget for an inode for which we do not have a reference 3040 * once we've acquired a log reservation. This is because the 3041 * inode we are trying to get might be in xfs_inactive going 3042 * for a log reservation. Since we'll have to wait for the 3043 * inactive code to complete before returning from xfs_iget, 3044 * we need to make sure that we don't have log space reserved 3045 * when we call xfs_iget. Instead we get an unlocked reference 3046 * to the inode before getting our log reservation. 3047 */ 3048 error = xfs_get_dir_entry(dentry, &cdp); 3049 if (error) { 3050 REMOVE_DEBUG_TRACE(__LINE__); 3051 goto std_return; 3052 } 3053 mp = dp->i_mount; 3054 dm_di_mode = cdp->i_d.di_mode; 3055 3056 /* 3057 * Get the dquots for the inodes. 3058 */ 3059 error = XFS_QM_DQATTACH(mp, dp, 0); 3060 if (!error && dp != cdp) 3061 error = XFS_QM_DQATTACH(mp, cdp, 0); 3062 if (error) { 3063 IRELE(cdp); 3064 REMOVE_DEBUG_TRACE(__LINE__); 3065 goto std_return; 3066 } 3067 3068 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); 3069 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 3070 /* 3071 * We try to get the real space reservation first, 3072 * allowing for directory btree deletion(s) implying 3073 * possible bmap insert(s). If we can't get the space 3074 * reservation then we use 0 instead, and avoid the bmap 3075 * btree insert(s) in the directory code by, if the bmap 3076 * insert tries to happen, instead trimming the LAST 3077 * block from the directory. 3078 */ 3079 resblks = XFS_REMOVE_SPACE_RES(mp); 3080 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 3081 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT); 3082 if (error == ENOSPC) { 3083 resblks = 0; 3084 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 3085 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT); 3086 } 3087 if (error) { 3088 ASSERT(error != ENOSPC); 3089 cancel_flags = 0; 3090 IRELE(cdp); 3091 goto error_return; 3092 } 3093 XFS_BMAP_INIT(&free_list, &first_block); 3094 3095 /* 3096 * Now lock the child directory inode and the parent directory 3097 * inode in the proper order. This will take care of validating 3098 * that the directory entry for the child directory inode has 3099 * not changed while we were obtaining a log reservation. 3100 */ 3101 error = xfs_lock_dir_and_entry(dp, dentry, cdp); 3102 if (error) { 3103 xfs_trans_cancel(tp, cancel_flags); 3104 IRELE(cdp); 3105 goto std_return; 3106 } 3107 3108 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 3109 if (dp != cdp) { 3110 /* 3111 * Only increment the parent directory vnode count if 3112 * we didn't bump it in looking up cdp. The only time 3113 * we don't bump it is when we're looking up ".". 3114 */ 3115 VN_HOLD(dir_vp); 3116 } 3117 3118 ITRACE(cdp); 3119 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL); 3120 3121 ASSERT(cdp->i_d.di_nlink >= 2); 3122 if (cdp->i_d.di_nlink != 2) { 3123 error = XFS_ERROR(ENOTEMPTY); 3124 goto error_return; 3125 } 3126 if (!XFS_DIR_ISEMPTY(mp, cdp)) { 3127 error = XFS_ERROR(ENOTEMPTY); 3128 goto error_return; 3129 } 3130 3131 error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, cdp->i_ino, 3132 &first_block, &free_list, resblks); 3133 if (error) { 3134 goto error1; 3135 } 3136 3137 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3138 3139 /* 3140 * Bump the in memory generation count on the parent 3141 * directory so that other can know that it has changed. 3142 */ 3143 dp->i_gen++; 3144 3145 /* 3146 * Drop the link from cdp's "..". 3147 */ 3148 error = xfs_droplink(tp, dp); 3149 if (error) { 3150 goto error1; 3151 } 3152 3153 /* 3154 * Drop the link from dp to cdp. 3155 */ 3156 error = xfs_droplink(tp, cdp); 3157 if (error) { 3158 goto error1; 3159 } 3160 3161 /* 3162 * Drop the "." link from cdp to self. 3163 */ 3164 error = xfs_droplink(tp, cdp); 3165 if (error) { 3166 goto error1; 3167 } 3168 3169 /* Determine these before committing transaction */ 3170 last_cdp_link = (cdp)->i_d.di_nlink==0; 3171 3172 /* 3173 * Take an extra ref on the child vnode so that it 3174 * does not go to xfs_inactive() from within the commit. 3175 */ 3176 IHOLD(cdp); 3177 3178 /* 3179 * If this is a synchronous mount, make sure that the 3180 * rmdir transaction goes to disk before returning to 3181 * the user. 3182 */ 3183 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 3184 xfs_trans_set_sync(tp); 3185 } 3186 3187 error = xfs_bmap_finish (&tp, &free_list, first_block, &committed); 3188 if (error) { 3189 xfs_bmap_cancel(&free_list); 3190 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 3191 XFS_TRANS_ABORT)); 3192 IRELE(cdp); 3193 goto std_return; 3194 } 3195 3196 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 3197 if (error) { 3198 IRELE(cdp); 3199 goto std_return; 3200 } 3201 3202 3203 /* 3204 * Let interposed file systems know about removed links. 3205 */ 3206 XVOP_LINK_REMOVED(XFS_ITOV(cdp), dir_vp, last_cdp_link); 3207 3208 IRELE(cdp); 3209 3210 /* Fall through to std_return with error = 0 or the errno 3211 * from xfs_trans_commit. */ 3212 std_return: 3213 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_POSTREMOVE)) { 3214 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 3215 dir_vp, DM_RIGHT_NULL, 3216 NULL, DM_RIGHT_NULL, 3217 name, NULL, dm_di_mode, 3218 error, 0); 3219 } 3220 return error; 3221 3222 error1: 3223 xfs_bmap_cancel(&free_list); 3224 cancel_flags |= XFS_TRANS_ABORT; 3225 /* FALLTHROUGH */ 3226 3227 error_return: 3228 xfs_trans_cancel(tp, cancel_flags); 3229 goto std_return; 3230} 3231 3232 3233/* 3234 * xfs_readdir 3235 * 3236 * Read dp's entries starting at uiop->uio_offset and translate them into 3237 * bufsize bytes worth of struct dirents starting at bufbase. 3238 */ 3239STATIC int 3240xfs_readdir( 3241 bhv_desc_t *dir_bdp, 3242 uio_t *uiop, 3243 cred_t *credp, 3244 int *eofp) 3245{ 3246 xfs_inode_t *dp; 3247 xfs_trans_t *tp = NULL; 3248 int error = 0; 3249 uint lock_mode; 3250 3251 vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__, 3252 (inst_t *)__return_address); 3253 dp = XFS_BHVTOI(dir_bdp); 3254 3255 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) { 3256 return XFS_ERROR(EIO); 3257 } 3258 3259 lock_mode = xfs_ilock_map_shared(dp); 3260 error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp); 3261 xfs_iunlock_map_shared(dp, lock_mode); 3262 return error; 3263} 3264 3265 3266/* 3267 * xfs_symlink 3268 * 3269 */ 3270STATIC int 3271xfs_symlink( 3272 bhv_desc_t *dir_bdp, 3273 vname_t *dentry, 3274 xfs_vattr_t *vap, 3275 char *target_path, 3276 xfs_vnode_t **vpp, 3277 cred_t *credp) 3278{ 3279 xfs_trans_t *tp; 3280 xfs_mount_t *mp; 3281 xfs_inode_t *dp; 3282 xfs_inode_t *ip; 3283 int error; 3284 int pathlen; 3285 xfs_bmap_free_t free_list; 3286 xfs_fsblock_t first_block; 3287 boolean_t dp_joined_to_trans; 3288 xfs_vnode_t *dir_vp; 3289 uint cancel_flags; 3290 int committed; 3291 xfs_fileoff_t first_fsb; 3292 xfs_filblks_t fs_blocks; 3293 int nmaps; 3294 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 3295 xfs_daddr_t d; 3296 char *cur_chunk; 3297 int byte_cnt; 3298 int n; 3299 xfs_buf_t *bp; 3300 xfs_prid_t prid; 3301 struct xfs_dquot *udqp, *gdqp; 3302 uint resblks; 3303 char *link_name = VNAME(dentry); 3304 int link_namelen; 3305 struct thread *current = curthread; 3306 3307 *vpp = NULL; 3308 dir_vp = BHV_TO_VNODE(dir_bdp); 3309 dp = XFS_BHVTOI(dir_bdp); 3310 dp_joined_to_trans = B_FALSE; 3311 error = 0; 3312 ip = NULL; 3313 tp = NULL; 3314 3315 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 3316 3317 mp = dp->i_mount; 3318 3319 if (XFS_FORCED_SHUTDOWN(mp)) 3320 return XFS_ERROR(EIO); 3321 3322 link_namelen = VNAMELEN(dentry); 3323 3324 /* 3325 * Check component lengths of the target path name. 3326 */ 3327 pathlen = strlen(target_path); 3328 if (pathlen >= MAXPATHLEN) /* total string too long */ 3329 return XFS_ERROR(ENAMETOOLONG); 3330 if (pathlen >= MAXNAMELEN) { /* is any component too long? */ 3331 int len, total; 3332 char *path; 3333 3334 for(total = 0, path = target_path; total < pathlen;) { 3335 /* 3336 * Skip any slashes. 3337 */ 3338 while(*path == '/') { 3339 total++; 3340 path++; 3341 } 3342 3343 /* 3344 * Count up to the next slash or end of path. 3345 * Error out if the component is bigger than MAXNAMELEN. 3346 */ 3347 for(len = 0; *path != '/' && total < pathlen;total++, path++) { 3348 if (++len >= MAXNAMELEN) { 3349 error = ENAMETOOLONG; 3350 return error; 3351 } 3352 } 3353 } 3354 } 3355 3356 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_SYMLINK)) { 3357 error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dir_vp, 3358 DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, 3359 link_name, target_path, 0, 0, 0); 3360 if (error) 3361 return error; 3362 } 3363 3364 /* Return through std_return after this point. */ 3365 3366 udqp = gdqp = NULL; 3367 3368#ifdef XXXKAN 3369 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 3370 prid = dp->i_d.di_projid; 3371 else if (vap->va_mask & XFS_AT_PROJID) 3372 prid = (xfs_prid_t)vap->va_projid; 3373 else 3374#endif 3375 prid = (xfs_prid_t)dfltprid; 3376 3377 /* 3378 * Make sure that we have allocated dquot(s) on disk. 3379 */ 3380 error = XFS_QM_DQVOPALLOC(mp, dp, 3381 current->td_ucred->cr_uid, 3382 current->td_ucred->cr_groups[0], 3383 prid, 3384 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 3385 if (error) 3386 goto std_return; 3387 3388 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); 3389 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 3390 /* 3391 * The symlink will fit into the inode data fork? 3392 * There can't be any attributes so we get the whole variable part. 3393 */ 3394 if (pathlen <= XFS_LITINO(mp)) 3395 fs_blocks = 0; 3396 else 3397 fs_blocks = XFS_B_TO_FSB(mp, pathlen); 3398 resblks = XFS_SYMLINK_SPACE_RES(mp, link_namelen, fs_blocks); 3399 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, 3400 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 3401 if (error == ENOSPC && fs_blocks == 0) { 3402 resblks = 0; 3403 error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0, 3404 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 3405 } 3406 if (error) { 3407 cancel_flags = 0; 3408 dp = NULL; 3409 goto error_return; 3410 } 3411 3412 xfs_ilock(dp, XFS_ILOCK_EXCL); 3413 3414 /* 3415 * Check whether the directory allows new symlinks or not. 3416 */ 3417 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { 3418 error = XFS_ERROR(EPERM); 3419 goto error_return; 3420 } 3421 3422 /* 3423 * Reserve disk quota : blocks and inode. 3424 */ 3425 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0); 3426 if (error) 3427 goto error_return; 3428 3429 /* 3430 * Check for ability to enter directory entry, if no space reserved. 3431 */ 3432 if (resblks == 0 && 3433 (error = XFS_DIR_CANENTER(mp, tp, dp, link_name, link_namelen))) 3434 goto error_return; 3435 /* 3436 * Initialize the bmap freelist prior to calling either 3437 * bmapi or the directory create code. 3438 */ 3439 XFS_BMAP_INIT(&free_list, &first_block); 3440 3441 /* 3442 * Allocate an inode for the symlink. 3443 */ 3444 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (vap->va_mode&~S_IFMT), 3445 1, 0, credp, prid, resblks > 0, &ip, NULL); 3446 if (error) { 3447 if (error == ENOSPC) 3448 goto error_return; 3449 goto error1; 3450 } 3451 ITRACE(ip); 3452 3453 VN_HOLD(dir_vp); 3454 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 3455 dp_joined_to_trans = B_TRUE; 3456 3457 /* 3458 * Also attach the dquot(s) to it, if applicable. 3459 */ 3460 XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp); 3461 3462 if (resblks) 3463 resblks -= XFS_IALLOC_SPACE_RES(mp); 3464 /* 3465 * If the symlink will fit into the inode, write it inline. 3466 */ 3467 if (pathlen <= XFS_IFORK_DSIZE(ip)) { 3468 xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK); 3469 memcpy(ip->i_df.if_u1.if_data, target_path, pathlen); 3470 ip->i_d.di_size = pathlen; 3471 3472 /* 3473 * The inode was initially created in extent format. 3474 */ 3475 ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT); 3476 ip->i_df.if_flags |= XFS_IFINLINE; 3477 3478 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL; 3479 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); 3480 3481 } else { 3482 first_fsb = 0; 3483 nmaps = SYMLINK_MAPS; 3484 3485 error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, 3486 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, 3487 &first_block, resblks, mval, &nmaps, 3488 &free_list, NULL); 3489 if (error) { 3490 goto error1; 3491 } 3492 3493 if (resblks) 3494 resblks -= fs_blocks; 3495 ip->i_d.di_size = pathlen; 3496 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 3497 3498 cur_chunk = target_path; 3499 for (n = 0; n < nmaps; n++) { 3500 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 3501 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 3502 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 3503 BTOBB(byte_cnt), 0); 3504 ASSERT(bp && !XFS_BUF_GETERROR(bp)); 3505 if (pathlen < byte_cnt) { 3506 byte_cnt = pathlen; 3507 } 3508 pathlen -= byte_cnt; 3509 3510 memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt); 3511 cur_chunk += byte_cnt; 3512 3513 xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1); 3514 } 3515 } 3516 3517 /* 3518 * Create the directory entry for the symlink. 3519 */ 3520 error = XFS_DIR_CREATENAME(mp, tp, dp, link_name, link_namelen, 3521 ip->i_ino, &first_block, &free_list, resblks); 3522 if (error) { 3523 goto error1; 3524 } 3525 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3526 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 3527 3528 /* 3529 * Bump the in memory version number of the parent directory 3530 * so that other processes accessing it will recognize that 3531 * the directory has changed. 3532 */ 3533 dp->i_gen++; 3534 3535 /* 3536 * If this is a synchronous mount, make sure that the 3537 * symlink transaction goes to disk before returning to 3538 * the user. 3539 */ 3540 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 3541 xfs_trans_set_sync(tp); 3542 } 3543 3544 /* 3545 * xfs_trans_commit normally decrements the vnode ref count 3546 * when it unlocks the inode. Since we want to return the 3547 * vnode to the caller, we bump the vnode ref count now. 3548 */ 3549 IHOLD(ip); 3550 3551 error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); 3552 if (error) { 3553 goto error2; 3554 } 3555 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 3556 XFS_QM_DQRELE(mp, udqp); 3557 XFS_QM_DQRELE(mp, gdqp); 3558 3559 /* Fall through to std_return with error = 0 or errno from 3560 * xfs_trans_commit */ 3561std_return: 3562 if (DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp), 3563 DM_EVENT_POSTSYMLINK)) { 3564 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK, 3565 dir_vp, DM_RIGHT_NULL, 3566 error ? NULL : XFS_ITOV(ip), 3567 DM_RIGHT_NULL, link_name, target_path, 3568 0, error, 0); 3569 } 3570 3571 if (!error) { 3572 xfs_vnode_t *vp; 3573 3574 ASSERT(ip); 3575 vp = XFS_ITOV(ip); 3576 *vpp = vp; 3577 } 3578 return error; 3579 3580 error2: 3581 IRELE(ip); 3582 error1: 3583 xfs_bmap_cancel(&free_list); 3584 cancel_flags |= XFS_TRANS_ABORT; 3585 error_return: 3586 xfs_trans_cancel(tp, cancel_flags); 3587 XFS_QM_DQRELE(mp, udqp); 3588 XFS_QM_DQRELE(mp, gdqp); 3589 3590 if (!dp_joined_to_trans && (dp != NULL)) { 3591 xfs_iunlock(dp, XFS_ILOCK_EXCL); 3592 } 3593 3594 goto std_return; 3595} 3596 3597 3598/* 3599 * xfs_fid2 3600 * 3601 * A fid routine that takes a pointer to a previously allocated 3602 * fid structure (like xfs_fast_fid) but uses a 64 bit inode number. 3603 */ 3604STATIC int 3605xfs_fid2( 3606 bhv_desc_t *bdp, 3607 fid_t *fidp) 3608{ 3609 xfs_inode_t *ip; 3610 xfs_fid2_t *xfid; 3611 3612 vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__, 3613 (inst_t *)__return_address); 3614 ASSERT(sizeof(xfs_fid_t) >= sizeof(xfs_fid2_t)); 3615 3616 xfid = (xfs_fid2_t *)fidp; 3617 ip = XFS_BHVTOI(bdp); 3618 xfid->fid_len = sizeof(xfs_fid2_t) - sizeof(xfid->fid_len); 3619 xfid->fid_pad = 0; 3620 /* 3621 * use memcpy because the inode is a long long and there's no 3622 * assurance that xfid->fid_ino is properly aligned. 3623 */ 3624 memcpy(&xfid->fid_ino, &ip->i_ino, sizeof(xfid->fid_ino)); 3625 xfid->fid_gen = ip->i_d.di_gen; 3626 3627 return 0; 3628} 3629 3630 3631/* 3632 * xfs_rwlock 3633 */ 3634int 3635xfs_rwlock( 3636 bhv_desc_t *bdp, 3637 vrwlock_t locktype) 3638{ 3639 xfs_inode_t *ip; 3640 xfs_vnode_t *vp; 3641 3642 vp = BHV_TO_VNODE(bdp); 3643 if (VN_ISDIR(vp)) 3644 return 1; 3645 ip = XFS_BHVTOI(bdp); 3646 if (locktype == VRWLOCK_WRITE) { 3647 xfs_ilock(ip, XFS_IOLOCK_EXCL); 3648 } else if (locktype == VRWLOCK_TRY_READ) { 3649 return xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED); 3650 } else if (locktype == VRWLOCK_TRY_WRITE) { 3651 return xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL); 3652 } else { 3653 ASSERT((locktype == VRWLOCK_READ) || 3654 (locktype == VRWLOCK_WRITE_DIRECT)); 3655 xfs_ilock(ip, XFS_IOLOCK_SHARED); 3656 } 3657 3658 return 1; 3659} 3660 3661 3662/* 3663 * xfs_rwunlock 3664 */ 3665void 3666xfs_rwunlock( 3667 bhv_desc_t *bdp, 3668 vrwlock_t locktype) 3669{ 3670 xfs_inode_t *ip; 3671 xfs_vnode_t *vp; 3672 3673 vp = BHV_TO_VNODE(bdp); 3674 if (VN_ISDIR(vp)) 3675 return; 3676 ip = XFS_BHVTOI(bdp); 3677 if (locktype == VRWLOCK_WRITE) { 3678 /* 3679 * In the write case, we may have added a new entry to 3680 * the reference cache. This might store a pointer to 3681 * an inode to be released in this inode. If it is there, 3682 * clear the pointer and release the inode after unlocking 3683 * this one. 3684 */ 3685 xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL); 3686 } else { 3687 ASSERT((locktype == VRWLOCK_READ) || 3688 (locktype == VRWLOCK_WRITE_DIRECT)); 3689 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 3690 } 3691 return; 3692} 3693 3694STATIC int 3695xfs_inode_flush( 3696 bhv_desc_t *bdp, 3697 int flags) 3698{ 3699 xfs_inode_t *ip; 3700 xfs_mount_t *mp; 3701 xfs_inode_log_item_t *iip; 3702 int error = 0; 3703 3704 ip = XFS_BHVTOI(bdp); 3705 mp = ip->i_mount; 3706 iip = ip->i_itemp; 3707 3708 if (XFS_FORCED_SHUTDOWN(mp)) 3709 return XFS_ERROR(EIO); 3710 3711 /* 3712 * Bypass inodes which have already been cleaned by 3713 * the inode flush clustering code inside xfs_iflush 3714 */ 3715 if ((ip->i_update_core == 0) && 3716 ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) 3717 return 0; 3718 3719 if (flags & FLUSH_LOG) { 3720 if (iip && iip->ili_last_lsn) { 3721 xlog_t *log = mp->m_log; 3722 xfs_lsn_t sync_lsn; 3723 int s, log_flags = XFS_LOG_FORCE; 3724 3725 s = GRANT_LOCK(log); 3726 sync_lsn = log->l_last_sync_lsn; 3727 GRANT_UNLOCK(log, s); 3728 3729 if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0)) 3730 return 0; 3731 3732 if (flags & FLUSH_SYNC) 3733 log_flags |= XFS_LOG_SYNC; 3734 return xfs_log_force(mp, iip->ili_last_lsn, log_flags); 3735 } 3736 } 3737 3738 /* 3739 * We make this non-blocking if the inode is contended, 3740 * return EAGAIN to indicate to the caller that they 3741 * did not succeed. This prevents the flush path from 3742 * blocking on inodes inside another operation right 3743 * now, they get caught later by xfs_sync. 3744 */ 3745 if (flags & FLUSH_INODE) { 3746 int flush_flags; 3747 3748 if (xfs_ipincount(ip)) 3749 return EAGAIN; 3750 3751 if (flags & FLUSH_SYNC) { 3752 xfs_ilock(ip, XFS_ILOCK_SHARED); 3753 xfs_iflock(ip); 3754 } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 3755 if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { 3756 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3757 return EAGAIN; 3758 } 3759 } else { 3760 return EAGAIN; 3761 } 3762 3763 if (flags & FLUSH_SYNC) 3764 flush_flags = XFS_IFLUSH_SYNC; 3765 else 3766 flush_flags = XFS_IFLUSH_ASYNC; 3767 3768 error = xfs_iflush(ip, flush_flags); 3769 xfs_iunlock(ip, XFS_ILOCK_SHARED); 3770 } 3771 3772 return error; 3773} 3774 3775 3776int 3777xfs_set_dmattrs ( 3778 bhv_desc_t *bdp, 3779 u_int evmask, 3780 u_int16_t state, 3781 cred_t *credp) 3782{ 3783 xfs_inode_t *ip; 3784 xfs_trans_t *tp; 3785 xfs_mount_t *mp; 3786 int error; 3787 3788 if (!capable(CAP_SYS_ADMIN)) 3789 return XFS_ERROR(EPERM); 3790 3791 ip = XFS_BHVTOI(bdp); 3792 mp = ip->i_mount; 3793 3794 if (XFS_FORCED_SHUTDOWN(mp)) 3795 return XFS_ERROR(EIO); 3796 3797 tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); 3798 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0); 3799 if (error) { 3800 xfs_trans_cancel(tp, 0); 3801 return error; 3802 } 3803 xfs_ilock(ip, XFS_ILOCK_EXCL); 3804 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 3805 3806 ip->i_iocore.io_dmevmask = ip->i_d.di_dmevmask = evmask; 3807 ip->i_iocore.io_dmstate = ip->i_d.di_dmstate = state; 3808 3809 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 3810 IHOLD(ip); 3811 error = xfs_trans_commit(tp, 0, NULL); 3812 3813 return error; 3814} 3815 3816 3817/* 3818 * xfs_reclaim 3819 */ 3820STATIC int 3821xfs_reclaim( 3822 bhv_desc_t *bdp) 3823{ 3824 xfs_inode_t *ip; 3825 xfs_vnode_t *vp; 3826 3827 vp = BHV_TO_VNODE(bdp); 3828 ip = XFS_BHVTOI(bdp); 3829 3830 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 3831 3832 ASSERT(!VN_MAPPED(vp)); 3833 3834 /* bad inode, get out here ASAP */ 3835 if (VN_BAD(vp)) { 3836 xfs_ireclaim(ip); 3837 return 0; 3838 } 3839 3840 vn_iowait(vp); 3841 3842 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 3843 3844 /* 3845 * Make sure the atime in the XFS inode is correct before freeing the 3846 * Linux inode. 3847 */ 3848 xfs_synchronize_atime(ip); 3849 3850 vnode_destroy_vobject(vp->v_vnode); 3851 3852 /* If we have nothing to flush with this inode then complete the 3853 * teardown now, otherwise break the link between the xfs inode 3854 * and the linux inode and clean up the xfs inode later. This 3855 * avoids flushing the inode to disk during the delete operation 3856 * itself. 3857 */ 3858 if (!ip->i_update_core && (ip->i_itemp == NULL)) { 3859 xfs_ilock(ip, XFS_ILOCK_EXCL); 3860 xfs_iflock(ip); 3861 return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); 3862 } else { 3863 xfs_mount_t *mp = ip->i_mount; 3864 3865 /* Protect sync from us */ 3866 XFS_MOUNT_ILOCK(mp); 3867 vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); 3868 TAILQ_INSERT_TAIL(&mp->m_del_inodes, ip, i_reclaim); 3869 ip->i_flags |= XFS_IRECLAIMABLE; 3870 XFS_MOUNT_IUNLOCK(mp); 3871 } 3872 return 0; 3873} 3874 3875int 3876xfs_finish_reclaim( 3877 xfs_inode_t *ip, 3878 int locked, 3879 int sync_mode) 3880{ 3881 xfs_ihash_t *ih = ip->i_hash; 3882 xfs_vnode_t *vp = XFS_ITOV_NULL(ip); 3883 int error; 3884 3885 if (vp && VN_BAD(vp)) 3886 goto reclaim; 3887 3888 /* The hash lock here protects a thread in xfs_iget_core from 3889 * racing with us on linking the inode back with a vnode. 3890 * Once we have the XFS_IRECLAIM flag set it will not touch 3891 * us. 3892 */ 3893 write_lock(&ih->ih_lock); 3894 if ((ip->i_flags & XFS_IRECLAIM) || 3895 (!(ip->i_flags & XFS_IRECLAIMABLE) && vp == NULL)) { 3896 write_unlock(&ih->ih_lock); 3897 if (locked) { 3898 xfs_ifunlock(ip); 3899 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3900 } 3901 return 1; 3902 } 3903 ip->i_flags |= XFS_IRECLAIM; 3904 write_unlock(&ih->ih_lock); 3905 3906 /* 3907 * If the inode is still dirty, then flush it out. If the inode 3908 * is not in the AIL, then it will be OK to flush it delwri as 3909 * long as xfs_iflush() does not keep any references to the inode. 3910 * We leave that decision up to xfs_iflush() since it has the 3911 * knowledge of whether it's OK to simply do a delwri flush of 3912 * the inode or whether we need to wait until the inode is 3913 * pulled from the AIL. 3914 * We get the flush lock regardless, though, just to make sure 3915 * we don't free it while it is being flushed. 3916 */ 3917 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 3918 if (!locked) { 3919 xfs_ilock(ip, XFS_ILOCK_EXCL); 3920 xfs_iflock(ip); 3921 } 3922 3923 if (ip->i_update_core || 3924 ((ip->i_itemp != NULL) && 3925 (ip->i_itemp->ili_format.ilf_fields != 0))) { 3926 error = xfs_iflush(ip, sync_mode); 3927 /* 3928 * If we hit an error, typically because of filesystem 3929 * shutdown, we don't need to let vn_reclaim to know 3930 * because we're gonna reclaim the inode anyway. 3931 */ 3932 if (error) { 3933 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3934 goto reclaim; 3935 } 3936 xfs_iflock(ip); /* synchronize with xfs_iflush_done */ 3937 } 3938 3939 ASSERT(ip->i_update_core == 0); 3940 ASSERT(ip->i_itemp == NULL || 3941 ip->i_itemp->ili_format.ilf_fields == 0); 3942 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3943 } else if (locked) { 3944 /* 3945 * We are not interested in doing an iflush if we're 3946 * in the process of shutting down the filesystem forcibly. 3947 * So, just reclaim the inode. 3948 */ 3949 xfs_ifunlock(ip); 3950 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3951 } 3952 3953 reclaim: 3954 xfs_ireclaim(ip); 3955 return 0; 3956} 3957 3958int 3959xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock) 3960{ 3961#ifdef RMC 3962 int purged; 3963 xfs_inode_t *ip, *n; 3964 int done = 0; 3965 3966 while (!done) { 3967 purged = 0; 3968 XFS_MOUNT_ILOCK(mp); 3969 TAILQ_FOREACH_SAFE(curr, &mp->m_del_inodes, i_reclaim, next) { 3970 ip = curr; 3971 if (noblock) { 3972 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) 3973 continue; 3974 if (xfs_ipincount(ip) || 3975 !xfs_iflock_nowait(ip)) { 3976 xfs_iunlock(ip, XFS_ILOCK_EXCL); 3977 continue; 3978 } 3979 } 3980 XFS_MOUNT_IUNLOCK(mp); 3981 if (xfs_finish_reclaim(ip, noblock, 3982 XFS_IFLUSH_DELWRI_ELSE_ASYNC)) 3983 delay(1); 3984 purged = 1; 3985 break; 3986 } 3987 3988 done = !purged; 3989 } 3990 3991 XFS_MOUNT_IUNLOCK(mp); 3992#endif 3993 return 0; 3994} 3995 3996/* 3997 * xfs_alloc_file_space() 3998 * This routine allocates disk space for the given file. 3999 * 4000 * If alloc_type == 0, this request is for an ALLOCSP type 4001 * request which will change the file size. In this case, no 4002 * DMAPI event will be generated by the call. A TRUNCATE event 4003 * will be generated later by xfs_setattr. 4004 * 4005 * If alloc_type != 0, this request is for a RESVSP type 4006 * request, and a DMAPI DM_EVENT_WRITE will be generated if the 4007 * lower block boundary byte address is less than the file's 4008 * length. 4009 * 4010 * RETURNS: 4011 * 0 on success 4012 * errno on error 4013 * 4014 */ 4015STATIC int 4016xfs_alloc_file_space( 4017 xfs_inode_t *ip, 4018 xfs_off_t offset, 4019 xfs_off_t len, 4020 int alloc_type, 4021 int attr_flags) 4022{ 4023 xfs_mount_t *mp = ip->i_mount; 4024 xfs_off_t count; 4025 xfs_filblks_t allocated_fsb; 4026 xfs_filblks_t allocatesize_fsb; 4027 xfs_extlen_t extsz, temp; 4028 xfs_fileoff_t startoffset_fsb; 4029 xfs_fsblock_t firstfsb; 4030 int nimaps; 4031 int bmapi_flag; 4032 int quota_flag; 4033 int rt; 4034 xfs_trans_t *tp; 4035 xfs_bmbt_irec_t imaps[1], *imapp; 4036 xfs_bmap_free_t free_list; 4037 uint qblocks, resblks, resrtextents; 4038 int committed; 4039 int error; 4040 4041 vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); 4042 4043 if (XFS_FORCED_SHUTDOWN(mp)) 4044 return XFS_ERROR(EIO); 4045 4046 rt = XFS_IS_REALTIME_INODE(ip); 4047 if (unlikely(rt)) { 4048 if (!(extsz = ip->i_d.di_extsize)) 4049 extsz = mp->m_sb.sb_rextsize; 4050 } else { 4051 extsz = ip->i_d.di_extsize; 4052 } 4053 4054 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 4055 return error; 4056 4057 if (len <= 0) 4058 return XFS_ERROR(EINVAL); 4059 4060 count = len; 4061 error = 0; 4062 imapp = &imaps[0]; 4063 nimaps = 1; 4064 bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); 4065 startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 4066 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 4067 4068 /* Generate a DMAPI event if needed. */ 4069 if (alloc_type != 0 && offset < ip->i_d.di_size && 4070 (attr_flags&ATTR_DMI) == 0 && 4071 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { 4072 xfs_off_t end_dmi_offset; 4073 4074 end_dmi_offset = offset+len; 4075 if (end_dmi_offset > ip->i_d.di_size) 4076 end_dmi_offset = ip->i_d.di_size; 4077 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), 4078 offset, end_dmi_offset - offset, 4079 0, NULL); 4080 if (error) 4081 return error; 4082 } 4083 4084 /* 4085 * Allocate file space until done or until there is an error 4086 */ 4087retry: 4088 while (allocatesize_fsb && !error) { 4089 xfs_fileoff_t s, e; 4090 4091 /* 4092 * Determine space reservations for data/realtime. 4093 */ 4094 if (unlikely(extsz)) { 4095 s = startoffset_fsb; 4096 do_div(s, extsz); 4097 s *= extsz; 4098 e = startoffset_fsb + allocatesize_fsb; 4099 if ((temp = do_mod(startoffset_fsb, extsz))) 4100 e += temp; 4101 if ((temp = do_mod(e, extsz))) 4102 e += extsz - temp; 4103 } else { 4104 s = 0; 4105 e = allocatesize_fsb; 4106 } 4107 4108 if (unlikely(rt)) { 4109 resrtextents = qblocks = (uint)(e - s); 4110 resrtextents /= mp->m_sb.sb_rextsize; 4111 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 4112 quota_flag = XFS_QMOPT_RES_RTBLKS; 4113 } else { 4114 resrtextents = 0; 4115 resblks = qblocks = \ 4116 XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s)); 4117 quota_flag = XFS_QMOPT_RES_REGBLKS; 4118 } 4119 4120 /* 4121 * Allocate and setup the transaction. 4122 */ 4123 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 4124 error = xfs_trans_reserve(tp, resblks, 4125 XFS_WRITE_LOG_RES(mp), resrtextents, 4126 XFS_TRANS_PERM_LOG_RES, 4127 XFS_WRITE_LOG_COUNT); 4128 /* 4129 * Check for running out of space 4130 */ 4131 if (error) { 4132 /* 4133 * Free the transaction structure. 4134 */ 4135 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 4136 xfs_trans_cancel(tp, 0); 4137 break; 4138 } 4139 xfs_ilock(ip, XFS_ILOCK_EXCL); 4140 error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, 4141 qblocks, 0, quota_flag); 4142 if (error) 4143 goto error1; 4144 4145 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 4146 xfs_trans_ihold(tp, ip); 4147 4148 /* 4149 * Issue the xfs_bmapi() call to allocate the blocks 4150 */ 4151 XFS_BMAP_INIT(&free_list, &firstfsb); 4152 error = XFS_BMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, 4153 allocatesize_fsb, bmapi_flag, 4154 &firstfsb, 0, imapp, &nimaps, 4155 &free_list, NULL); 4156 if (error) { 4157 goto error0; 4158 } 4159 4160 /* 4161 * Complete the transaction 4162 */ 4163 error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); 4164 if (error) { 4165 goto error0; 4166 } 4167 4168 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 4169 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4170 if (error) { 4171 break; 4172 } 4173 4174 allocated_fsb = imapp->br_blockcount; 4175 4176 if (nimaps == 0) { 4177 error = XFS_ERROR(ENOSPC); 4178 break; 4179 } 4180 4181 startoffset_fsb += allocated_fsb; 4182 allocatesize_fsb -= allocated_fsb; 4183 } 4184dmapi_enospc_check: 4185 if (error == ENOSPC && (attr_flags&ATTR_DMI) == 0 && 4186 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_NOSPACE)) { 4187 4188 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, 4189 XFS_ITOV(ip), DM_RIGHT_NULL, 4190 XFS_ITOV(ip), DM_RIGHT_NULL, 4191 NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ 4192 if (error == 0) 4193 goto retry; /* Maybe DMAPI app. has made space */ 4194 /* else fall through with error from XFS_SEND_DATA */ 4195 } 4196 4197 return error; 4198 4199error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 4200 xfs_bmap_cancel(&free_list); 4201 XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); 4202 4203error1: /* Just cancel transaction */ 4204 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 4205 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4206 goto dmapi_enospc_check; 4207} 4208 4209/* 4210 * Zero file bytes between startoff and endoff inclusive. 4211 * The iolock is held exclusive and no blocks are buffered. 4212 */ 4213STATIC int 4214xfs_zero_remaining_bytes( 4215 xfs_inode_t *ip, 4216 xfs_off_t startoff, 4217 xfs_off_t endoff) 4218{ 4219 xfs_bmbt_irec_t imap; 4220 xfs_fileoff_t offset_fsb; 4221 xfs_off_t lastoffset; 4222 xfs_off_t offset; 4223 xfs_buf_t *bp; 4224 xfs_mount_t *mp = ip->i_mount; 4225 int nimap; 4226 int error = 0; 4227 4228 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, 4229 ip->i_d.di_flags & XFS_DIFLAG_REALTIME ? 4230 mp->m_rtdev_targp : mp->m_ddev_targp); 4231 4232 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 4233 offset_fsb = XFS_B_TO_FSBT(mp, offset); 4234 nimap = 1; 4235 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, offset_fsb, 1, 0, 4236 NULL, 0, &imap, &nimap, NULL, NULL); 4237 if (error || nimap < 1) 4238 break; 4239 ASSERT(imap.br_blockcount >= 1); 4240 ASSERT(imap.br_startoff == offset_fsb); 4241 lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; 4242 if (lastoffset > endoff) 4243 lastoffset = endoff; 4244 if (imap.br_startblock == HOLESTARTBLOCK) 4245 continue; 4246 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 4247 if (imap.br_state == XFS_EXT_UNWRITTEN) 4248 continue; 4249 XFS_BUF_UNDONE(bp); 4250 XFS_BUF_UNWRITE(bp); 4251 XFS_BUF_READ(bp); 4252 XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DB(ip, imap.br_startblock)); 4253 xfsbdstrat(mp, bp); 4254 if ((error = xfs_iowait(bp))) { 4255 xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", 4256 mp, bp, XFS_BUF_ADDR(bp)); 4257 break; 4258 } 4259 memset(XFS_BUF_PTR(bp) + 4260 (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 4261 0, lastoffset - offset + 1); 4262 XFS_BUF_UNDONE(bp); 4263 XFS_BUF_UNREAD(bp); 4264 XFS_BUF_WRITE(bp); 4265 xfsbdstrat(mp, bp); 4266 if ((error = xfs_iowait(bp))) { 4267 xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", 4268 mp, bp, XFS_BUF_ADDR(bp)); 4269 break; 4270 } 4271 } 4272 xfs_buf_free(bp); 4273 return error; 4274} 4275 4276/* 4277 * xfs_free_file_space() 4278 * This routine frees disk space for the given file. 4279 * 4280 * This routine is only called by xfs_change_file_space 4281 * for an UNRESVSP type call. 4282 * 4283 * RETURNS: 4284 * 0 on success 4285 * errno on error 4286 * 4287 */ 4288STATIC int 4289xfs_free_file_space( 4290 xfs_inode_t *ip, 4291 xfs_off_t offset, 4292 xfs_off_t len, 4293 int attr_flags) 4294{ 4295 xfs_vnode_t *vp; 4296 int committed; 4297 int done; 4298 xfs_off_t end_dmi_offset; 4299 xfs_fileoff_t endoffset_fsb; 4300 int error; 4301 xfs_fsblock_t firstfsb; 4302 xfs_bmap_free_t free_list; 4303 xfs_off_t ilen; 4304 xfs_bmbt_irec_t imap; 4305 xfs_off_t ioffset; 4306 xfs_extlen_t mod=0; 4307 xfs_mount_t *mp; 4308 int nimap; 4309 uint resblks; 4310 int rounding; 4311 int rt; 4312 xfs_fileoff_t startoffset_fsb; 4313 xfs_trans_t *tp; 4314 int need_iolock = 1; 4315 4316 vp = XFS_ITOV(ip); 4317 mp = ip->i_mount; 4318 4319 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 4320 4321 if ((error = XFS_QM_DQATTACH(mp, ip, 0))) 4322 return error; 4323 4324 error = 0; 4325 if (len <= 0) /* if nothing being freed */ 4326 return error; 4327 rt = (ip->i_d.di_flags & XFS_DIFLAG_REALTIME); 4328 startoffset_fsb = XFS_B_TO_FSB(mp, offset); 4329 end_dmi_offset = offset + len; 4330 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); 4331 4332 if (offset < ip->i_d.di_size && 4333 (attr_flags & ATTR_DMI) == 0 && 4334 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { 4335 if (end_dmi_offset > ip->i_d.di_size) 4336 end_dmi_offset = ip->i_d.di_size; 4337 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, 4338 offset, end_dmi_offset - offset, 4339 AT_DELAY_FLAG(attr_flags), NULL); 4340 if (error) 4341 return error; 4342 } 4343 4344 ASSERT(attr_flags & ATTR_NOLOCK ? attr_flags & ATTR_DMI : 1); 4345 if (attr_flags & ATTR_NOLOCK) 4346 need_iolock = 0; 4347 if (need_iolock) { 4348 xfs_ilock(ip, XFS_IOLOCK_EXCL); 4349 vn_iowait(vp); /* wait for the completion of any pending DIOs */ 4350 } 4351 4352 rounding = MAX((__uint8_t)(1 << mp->m_sb.sb_blocklog), 4353 (__uint8_t)NBPP); 4354 ilen = len + (offset & (rounding - 1)); 4355 ioffset = offset & ~(rounding - 1); 4356 if (ilen & (rounding - 1)) 4357 ilen = (ilen + rounding) & ~(rounding - 1); 4358 4359 if (VN_CACHED(vp) != 0) { 4360 xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, 4361 ctooff(offtoct(ioffset)), -1); 4362 XVOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(ioffset)), 4363 -1, FI_REMAPF_LOCKED); 4364 } 4365 4366 /* 4367 * Need to zero the stuff we're not freeing, on disk. 4368 * If its a realtime file & can't use unwritten extents then we 4369 * actually need to zero the extent edges. Otherwise xfs_bunmapi 4370 * will take care of it for us. 4371 */ 4372 if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { 4373 nimap = 1; 4374 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, startoffset_fsb, 4375 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 4376 if (error) 4377 goto out_unlock_iolock; 4378 ASSERT(nimap == 0 || nimap == 1); 4379 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 4380 xfs_daddr_t block; 4381 4382 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 4383 block = imap.br_startblock; 4384 mod = do_div(block, mp->m_sb.sb_rextsize); 4385 if (mod) 4386 startoffset_fsb += mp->m_sb.sb_rextsize - mod; 4387 } 4388 nimap = 1; 4389 error = XFS_BMAPI(mp, NULL, &ip->i_iocore, endoffset_fsb - 1, 4390 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); 4391 if (error) 4392 goto out_unlock_iolock; 4393 ASSERT(nimap == 0 || nimap == 1); 4394 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 4395 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 4396 mod++; 4397 if (mod && (mod != mp->m_sb.sb_rextsize)) 4398 endoffset_fsb -= mod; 4399 } 4400 } 4401 if ((done = (endoffset_fsb <= startoffset_fsb))) 4402 /* 4403 * One contiguous piece to clear 4404 */ 4405 error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1); 4406 else { 4407 /* 4408 * Some full blocks, possibly two pieces to clear 4409 */ 4410 if (offset < XFS_FSB_TO_B(mp, startoffset_fsb)) 4411 error = xfs_zero_remaining_bytes(ip, offset, 4412 XFS_FSB_TO_B(mp, startoffset_fsb) - 1); 4413 if (!error && 4414 XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len) 4415 error = xfs_zero_remaining_bytes(ip, 4416 XFS_FSB_TO_B(mp, endoffset_fsb), 4417 offset + len - 1); 4418 } 4419 4420 /* 4421 * free file space until done or until there is an error 4422 */ 4423 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 4424 while (!error && !done) { 4425 4426 /* 4427 * allocate and setup the transaction 4428 */ 4429 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 4430 error = xfs_trans_reserve(tp, 4431 resblks, 4432 XFS_WRITE_LOG_RES(mp), 4433 0, 4434 XFS_TRANS_PERM_LOG_RES, 4435 XFS_WRITE_LOG_COUNT); 4436 4437 /* 4438 * check for running out of space 4439 */ 4440 if (error) { 4441 /* 4442 * Free the transaction structure. 4443 */ 4444 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 4445 xfs_trans_cancel(tp, 0); 4446 break; 4447 } 4448 xfs_ilock(ip, XFS_ILOCK_EXCL); 4449 error = XFS_TRANS_RESERVE_QUOTA(mp, tp, 4450 ip->i_udquot, ip->i_gdquot, resblks, 0, 4451 XFS_QMOPT_RES_REGBLKS); 4452 if (error) 4453 goto error1; 4454 4455 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 4456 xfs_trans_ihold(tp, ip); 4457 4458 /* 4459 * issue the bunmapi() call to free the blocks 4460 */ 4461 XFS_BMAP_INIT(&free_list, &firstfsb); 4462 error = XFS_BUNMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, 4463 endoffset_fsb - startoffset_fsb, 4464 0, 2, &firstfsb, &free_list, NULL, &done); 4465 if (error) { 4466 goto error0; 4467 } 4468 4469 /* 4470 * complete the transaction 4471 */ 4472 error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); 4473 if (error) { 4474 goto error0; 4475 } 4476 4477 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); 4478 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4479 } 4480 4481 out_unlock_iolock: 4482 if (need_iolock) 4483 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 4484 return error; 4485 4486 error0: 4487 xfs_bmap_cancel(&free_list); 4488 error1: 4489 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 4490 xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) : 4491 XFS_ILOCK_EXCL); 4492 return error; 4493} 4494 4495/* 4496 * xfs_change_file_space() 4497 * This routine allocates or frees disk space for the given file. 4498 * The user specified parameters are checked for alignment and size 4499 * limitations. 4500 * 4501 * RETURNS: 4502 * 0 on success 4503 * errno on error 4504 * 4505 */ 4506int 4507xfs_change_file_space( 4508 bhv_desc_t *bdp, 4509 u_long cmd, 4510 xfs_flock64_t *bf, 4511 xfs_off_t offset, 4512 cred_t *credp, 4513 int attr_flags) 4514{ 4515 int clrprealloc; 4516 int error; 4517 xfs_fsize_t fsize; 4518 xfs_inode_t *ip; 4519 xfs_mount_t *mp; 4520 int setprealloc; 4521 xfs_off_t startoffset; 4522 xfs_off_t llen; 4523 xfs_trans_t *tp; 4524 xfs_vattr_t va; 4525 xfs_vnode_t *vp; 4526 4527 vp = BHV_TO_VNODE(bdp); 4528 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 4529 4530 ip = XFS_BHVTOI(bdp); 4531 mp = ip->i_mount; 4532 4533 /* 4534 * must be a regular file and have write permission 4535 */ 4536 if (!VN_ISREG(vp)) 4537 return XFS_ERROR(EINVAL); 4538 4539 xfs_ilock(ip, XFS_ILOCK_SHARED); 4540 4541 if ((error = xfs_iaccess(ip, VWRITE, credp))) { 4542 xfs_iunlock(ip, XFS_ILOCK_SHARED); 4543 return error; 4544 } 4545 4546 xfs_iunlock(ip, XFS_ILOCK_SHARED); 4547 4548 switch (bf->l_whence) { 4549 case 0: /*SEEK_SET*/ 4550 break; 4551 case 1: /*SEEK_CUR*/ 4552 bf->l_start += offset; 4553 break; 4554 case 2: /*SEEK_END*/ 4555 bf->l_start += ip->i_d.di_size; 4556 break; 4557 default: 4558 return XFS_ERROR(EINVAL); 4559 } 4560 4561 llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len; 4562 4563 if ( (bf->l_start < 0) 4564 || (bf->l_start > XFS_MAXIOFFSET(mp)) 4565 || (bf->l_start + llen < 0) 4566 || (bf->l_start + llen > XFS_MAXIOFFSET(mp))) 4567 return XFS_ERROR(EINVAL); 4568 4569 bf->l_whence = 0; 4570 4571 startoffset = bf->l_start; 4572 fsize = ip->i_d.di_size; 4573 4574 /* 4575 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve 4576 * file space. 4577 * These calls do NOT zero the data space allocated to the file, 4578 * nor do they change the file size. 4579 * 4580 * XFS_IOC_ALLOCSP and XFS_IOC_FREESP will allocate and free file 4581 * space. 4582 * These calls cause the new file data to be zeroed and the file 4583 * size to be changed. 4584 */ 4585 setprealloc = clrprealloc = 0; 4586 4587 switch (cmd) { 4588 case XFS_IOC_RESVSP: 4589 case XFS_IOC_RESVSP64: 4590 error = xfs_alloc_file_space(ip, startoffset, bf->l_len, 4591 1, attr_flags); 4592 if (error) 4593 return error; 4594 setprealloc = 1; 4595 break; 4596 4597 case XFS_IOC_UNRESVSP: 4598 case XFS_IOC_UNRESVSP64: 4599 if ((error = xfs_free_file_space(ip, startoffset, bf->l_len, 4600 attr_flags))) 4601 return error; 4602 break; 4603 4604 case XFS_IOC_ALLOCSP: 4605 case XFS_IOC_ALLOCSP64: 4606 case XFS_IOC_FREESP: 4607 case XFS_IOC_FREESP64: 4608 if (startoffset > fsize) { 4609 error = xfs_alloc_file_space(ip, fsize, 4610 startoffset - fsize, 0, attr_flags); 4611 if (error) 4612 break; 4613 } 4614 4615 va.va_mask = XFS_AT_SIZE; 4616 va.va_size = startoffset; 4617 4618 error = xfs_setattr(bdp, &va, attr_flags, credp); 4619 4620 if (error) 4621 return error; 4622 4623 clrprealloc = 1; 4624 break; 4625 4626 default: 4627 ASSERT(0); 4628 return XFS_ERROR(EINVAL); 4629 } 4630 4631 /* 4632 * update the inode timestamp, mode, and prealloc flag bits 4633 */ 4634 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); 4635 4636 if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp), 4637 0, 0, 0))) { 4638 /* ASSERT(0); */ 4639 xfs_trans_cancel(tp, 0); 4640 return error; 4641 } 4642 4643 xfs_ilock(ip, XFS_ILOCK_EXCL); 4644 4645 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 4646 xfs_trans_ihold(tp, ip); 4647 4648 if ((attr_flags & ATTR_DMI) == 0) { 4649 ip->i_d.di_mode &= ~S_ISUID; 4650 4651 /* 4652 * Note that we don't have to worry about mandatory 4653 * file locking being disabled here because we only 4654 * clear the S_ISGID bit if the Group execute bit is 4655 * on, but if it was on then mandatory locking wouldn't 4656 * have been enabled. 4657 */ 4658 if (ip->i_d.di_mode & S_IXGRP) 4659 ip->i_d.di_mode &= ~S_ISGID; 4660 4661 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 4662 } 4663 if (setprealloc) 4664 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 4665 else if (clrprealloc) 4666 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 4667 4668 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 4669 xfs_trans_set_sync(tp); 4670 4671 error = xfs_trans_commit(tp, 0, NULL); 4672 4673 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4674 4675 return error; 4676} 4677 4678 4679xfs_vnodeops_t xfs_vnodeops = { 4680 BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS), 4681 .vop_open = xfs_open, 4682 .vop_read = xfs_read, 4683#ifdef HAVE_SENDFILE 4684 .vop_sendfile = xfs_sendfile, 4685#endif 4686 .vop_write = xfs_write, 4687 .vop_ioctl = xfs_ioctl, 4688 .vop_getattr = xfs_getattr, 4689 .vop_setattr = xfs_setattr, 4690 .vop_access = xfs_access, 4691 .vop_lookup = xfs_lookup, 4692 .vop_create = xfs_create, 4693 .vop_remove = xfs_remove, 4694 .vop_link = xfs_link, 4695 .vop_rename = xfs_rename, 4696 .vop_mkdir = xfs_mkdir, 4697 .vop_rmdir = xfs_rmdir, 4698 .vop_readdir = xfs_readdir, 4699 .vop_symlink = xfs_symlink, 4700 .vop_readlink = xfs_readlink, 4701 .vop_fsync = xfs_fsync, 4702 .vop_inactive = xfs_inactive, 4703 .vop_fid2 = xfs_fid2, 4704 .vop_rwlock = xfs_rwlock, 4705 .vop_rwunlock = xfs_rwunlock, 4706 .vop_bmap = xfs_bmap, 4707 .vop_reclaim = xfs_reclaim, 4708 .vop_attr_get = xfs_attr_get, 4709 .vop_attr_set = xfs_attr_set, 4710 .vop_attr_remove = xfs_attr_remove, 4711 .vop_attr_list = xfs_attr_list, 4712 .vop_link_removed = (xfs_vop_link_removed_t)fs_noval, 4713 .vop_vnode_change = (xfs_vop_vnode_change_t)fs_noval, 4714 .vop_tosspages = fs_tosspages, 4715 .vop_flushinval_pages = fs_flushinval_pages, 4716 .vop_flush_pages = fs_flush_pages, 4717 .vop_release = xfs_release, 4718 .vop_iflush = xfs_inode_flush, 4719}; 4720