1/* 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "xfs.h" 20#include "xfs_fs.h" 21#include "xfs_types.h" 22#include "xfs_bit.h" 23#include "xfs_log.h" 24#include "xfs_inum.h" 25#include "xfs_trans.h" 26#include "xfs_sb.h" 27#include "xfs_ag.h" 28#include "xfs_dir2.h" 29#include "xfs_mount.h" 30#include "xfs_da_btree.h" 31#include "xfs_bmap_btree.h" 32#include "xfs_ialloc_btree.h" 33#include "xfs_dinode.h" 34#include "xfs_inode.h" 35#include "xfs_inode_item.h" 36#include "xfs_itable.h" 37#include "xfs_ialloc.h" 38#include "xfs_alloc.h" 39#include "xfs_bmap.h" 40#include "xfs_acl.h" 41#include "xfs_attr.h" 42#include "xfs_rw.h" 43#include "xfs_error.h" 44#include "xfs_quota.h" 45#include "xfs_utils.h" 46#include "xfs_rtalloc.h" 47#include "xfs_trans_space.h" 48#include "xfs_log_priv.h" 49#include "xfs_filestream.h" 50#include "xfs_vnodeops.h" 51#include "xfs_trace.h" 52 53int 54xfs_setattr( 55 struct xfs_inode *ip, 56 struct iattr *iattr, 57 int flags) 58{ 59 xfs_mount_t *mp = ip->i_mount; 60 struct inode *inode = VFS_I(ip); 61 int mask = iattr->ia_valid; 62 xfs_trans_t *tp; 63 int code; 64 uint lock_flags; 65 uint commit_flags=0; 66 uid_t uid=0, iuid=0; 67 gid_t gid=0, igid=0; 68 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 69 int need_iolock = 1; 70 71 trace_xfs_setattr(ip); 72 73 if (mp->m_flags & XFS_MOUNT_RDONLY) 74 return XFS_ERROR(EROFS); 75 76 if (XFS_FORCED_SHUTDOWN(mp)) 77 return XFS_ERROR(EIO); 78 79 code = -inode_change_ok(inode, iattr); 80 if (code) 81 return code; 82 83 olddquot1 = olddquot2 = NULL; 84 udqp = gdqp = NULL; 85 86 /* 87 * If disk quotas is on, we make sure that the dquots do exist on disk, 88 * before we start any other transactions. Trying to do this later 89 * is messy. We don't care to take a readlock to look at the ids 90 * in inode here, because we can't hold it across the trans_reserve. 91 * If the IDs do change before we take the ilock, we're covered 92 * because the i_*dquot fields will get updated anyway. 93 */ 94 if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { 95 uint qflags = 0; 96 97 if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { 98 uid = iattr->ia_uid; 99 qflags |= XFS_QMOPT_UQUOTA; 100 } else { 101 uid = ip->i_d.di_uid; 102 } 103 if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { 104 gid = iattr->ia_gid; 105 qflags |= XFS_QMOPT_GQUOTA; 106 } else { 107 gid = ip->i_d.di_gid; 108 } 109 110 /* 111 * We take a reference when we initialize udqp and gdqp, 112 * so it is important that we never blindly double trip on 113 * the same variable. See xfs_create() for an example. 114 */ 115 ASSERT(udqp == NULL); 116 ASSERT(gdqp == NULL); 117 code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid, 118 qflags, &udqp, &gdqp); 119 if (code) 120 return code; 121 } 122 123 /* 124 * For the other attributes, we acquire the inode lock and 125 * first do an error checking pass. 126 */ 127 tp = NULL; 128 lock_flags = XFS_ILOCK_EXCL; 129 if (flags & XFS_ATTR_NOLOCK) 130 need_iolock = 0; 131 if (!(mask & ATTR_SIZE)) { 132 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 133 commit_flags = 0; 134 code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 135 0, 0, 0); 136 if (code) { 137 lock_flags = 0; 138 goto error_return; 139 } 140 } else { 141 if (need_iolock) 142 lock_flags |= XFS_IOLOCK_EXCL; 143 } 144 145 xfs_ilock(ip, lock_flags); 146 147 /* 148 * Change file ownership. Must be the owner or privileged. 149 */ 150 if (mask & (ATTR_UID|ATTR_GID)) { 151 /* 152 * These IDs could have changed since we last looked at them. 153 * But, we're assured that if the ownership did change 154 * while we didn't have the inode locked, inode's dquot(s) 155 * would have changed also. 156 */ 157 iuid = ip->i_d.di_uid; 158 igid = ip->i_d.di_gid; 159 gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; 160 uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; 161 162 /* 163 * Do a quota reservation only if uid/gid is actually 164 * going to change. 165 */ 166 if (XFS_IS_QUOTA_RUNNING(mp) && 167 ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || 168 (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { 169 ASSERT(tp); 170 code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, 171 capable(CAP_FOWNER) ? 172 XFS_QMOPT_FORCE_RES : 0); 173 if (code) /* out of quota */ 174 goto error_return; 175 } 176 } 177 178 /* 179 * Truncate file. Must have write permission and not be a directory. 180 */ 181 if (mask & ATTR_SIZE) { 182 /* Short circuit the truncate case for zero length files */ 183 if (iattr->ia_size == 0 && 184 ip->i_size == 0 && ip->i_d.di_nextents == 0) { 185 xfs_iunlock(ip, XFS_ILOCK_EXCL); 186 lock_flags &= ~XFS_ILOCK_EXCL; 187 if (mask & ATTR_CTIME) 188 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 189 code = 0; 190 goto error_return; 191 } 192 193 if (S_ISDIR(ip->i_d.di_mode)) { 194 code = XFS_ERROR(EISDIR); 195 goto error_return; 196 } else if (!S_ISREG(ip->i_d.di_mode)) { 197 code = XFS_ERROR(EINVAL); 198 goto error_return; 199 } 200 201 /* 202 * Make sure that the dquots are attached to the inode. 203 */ 204 code = xfs_qm_dqattach_locked(ip, 0); 205 if (code) 206 goto error_return; 207 208 /* 209 * Now we can make the changes. Before we join the inode 210 * to the transaction, if ATTR_SIZE is set then take care of 211 * the part of the truncation that must be done without the 212 * inode lock. This needs to be done before joining the inode 213 * to the transaction, because the inode cannot be unlocked 214 * once it is a part of the transaction. 215 */ 216 if (iattr->ia_size > ip->i_size) { 217 /* 218 * Do the first part of growing a file: zero any data 219 * in the last block that is beyond the old EOF. We 220 * need to do this before the inode is joined to the 221 * transaction to modify the i_size. 222 */ 223 code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); 224 if (code) 225 goto error_return; 226 } 227 xfs_iunlock(ip, XFS_ILOCK_EXCL); 228 lock_flags &= ~XFS_ILOCK_EXCL; 229 230 /* 231 * We are going to log the inode size change in this 232 * transaction so any previous writes that are beyond the on 233 * disk EOF and the new EOF that have not been written out need 234 * to be written here. If we do not write the data out, we 235 * expose ourselves to the null files problem. 236 * 237 * Only flush from the on disk size to the smaller of the in 238 * memory file size or the new size as that's the range we 239 * really care about here and prevents waiting for other data 240 * not within the range we care about here. 241 */ 242 if (ip->i_size != ip->i_d.di_size && 243 iattr->ia_size > ip->i_d.di_size) { 244 code = xfs_flush_pages(ip, 245 ip->i_d.di_size, iattr->ia_size, 246 XBF_ASYNC, FI_NONE); 247 if (code) 248 goto error_return; 249 } 250 251 /* wait for all I/O to complete */ 252 xfs_ioend_wait(ip); 253 254 code = -block_truncate_page(inode->i_mapping, iattr->ia_size, 255 xfs_get_blocks); 256 if (code) 257 goto error_return; 258 259 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 260 code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 261 XFS_TRANS_PERM_LOG_RES, 262 XFS_ITRUNCATE_LOG_COUNT); 263 if (code) 264 goto error_return; 265 266 truncate_setsize(inode, iattr->ia_size); 267 268 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 269 lock_flags |= XFS_ILOCK_EXCL; 270 271 xfs_ilock(ip, XFS_ILOCK_EXCL); 272 273 xfs_trans_ijoin(tp, ip); 274 275 /* 276 * Only change the c/mtime if we are changing the size 277 * or we are explicitly asked to change it. This handles 278 * the semantic difference between truncate() and ftruncate() 279 * as implemented in the VFS. 280 * 281 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME 282 * is a special case where we need to update the times despite 283 * not having these flags set. For all other operations the 284 * VFS set these flags explicitly if it wants a timestamp 285 * update. 286 */ 287 if (iattr->ia_size != ip->i_size && 288 (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { 289 iattr->ia_ctime = iattr->ia_mtime = 290 current_fs_time(inode->i_sb); 291 mask |= ATTR_CTIME | ATTR_MTIME; 292 } 293 294 if (iattr->ia_size > ip->i_size) { 295 ip->i_d.di_size = iattr->ia_size; 296 ip->i_size = iattr->ia_size; 297 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 298 } else if (iattr->ia_size <= ip->i_size || 299 (iattr->ia_size == 0 && ip->i_d.di_nextents)) { 300 /* 301 * signal a sync transaction unless 302 * we're truncating an already unlinked 303 * file on a wsync filesystem 304 */ 305 code = xfs_itruncate_finish(&tp, ip, iattr->ia_size, 306 XFS_DATA_FORK, 307 ((ip->i_d.di_nlink != 0 || 308 !(mp->m_flags & XFS_MOUNT_WSYNC)) 309 ? 1 : 0)); 310 if (code) 311 goto abort_return; 312 /* 313 * Truncated "down", so we're removing references 314 * to old data here - if we now delay flushing for 315 * a long time, we expose ourselves unduly to the 316 * notorious NULL files problem. So, we mark this 317 * vnode and flush it when the file is closed, and 318 * do not wait the usual (long) time for writeout. 319 */ 320 xfs_iflags_set(ip, XFS_ITRUNCATED); 321 } 322 } else if (tp) { 323 xfs_trans_ijoin(tp, ip); 324 } 325 326 /* 327 * Change file ownership. Must be the owner or privileged. 328 */ 329 if (mask & (ATTR_UID|ATTR_GID)) { 330 /* 331 * CAP_FSETID overrides the following restrictions: 332 * 333 * The set-user-ID and set-group-ID bits of a file will be 334 * cleared upon successful return from chown() 335 */ 336 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && 337 !capable(CAP_FSETID)) { 338 ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); 339 } 340 341 /* 342 * Change the ownerships and register quota modifications 343 * in the transaction. 344 */ 345 if (iuid != uid) { 346 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { 347 ASSERT(mask & ATTR_UID); 348 ASSERT(udqp); 349 olddquot1 = xfs_qm_vop_chown(tp, ip, 350 &ip->i_udquot, udqp); 351 } 352 ip->i_d.di_uid = uid; 353 inode->i_uid = uid; 354 } 355 if (igid != gid) { 356 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { 357 ASSERT(!XFS_IS_PQUOTA_ON(mp)); 358 ASSERT(mask & ATTR_GID); 359 ASSERT(gdqp); 360 olddquot2 = xfs_qm_vop_chown(tp, ip, 361 &ip->i_gdquot, gdqp); 362 } 363 ip->i_d.di_gid = gid; 364 inode->i_gid = gid; 365 } 366 } 367 368 /* 369 * Change file access modes. 370 */ 371 if (mask & ATTR_MODE) { 372 umode_t mode = iattr->ia_mode; 373 374 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) 375 mode &= ~S_ISGID; 376 377 ip->i_d.di_mode &= S_IFMT; 378 ip->i_d.di_mode |= mode & ~S_IFMT; 379 380 inode->i_mode &= S_IFMT; 381 inode->i_mode |= mode & ~S_IFMT; 382 } 383 384 /* 385 * Change file access or modified times. 386 */ 387 if (mask & ATTR_ATIME) { 388 inode->i_atime = iattr->ia_atime; 389 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; 390 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; 391 ip->i_update_core = 1; 392 } 393 if (mask & ATTR_CTIME) { 394 inode->i_ctime = iattr->ia_ctime; 395 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 396 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 397 ip->i_update_core = 1; 398 } 399 if (mask & ATTR_MTIME) { 400 inode->i_mtime = iattr->ia_mtime; 401 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; 402 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; 403 ip->i_update_core = 1; 404 } 405 406 /* 407 * And finally, log the inode core if any attribute in it 408 * has been changed. 409 */ 410 if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE| 411 ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) 412 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 413 414 XFS_STATS_INC(xs_ig_attrchg); 415 416 /* 417 * If this is a synchronous mount, make sure that the 418 * transaction goes to disk before returning to the user. 419 * This is slightly sub-optimal in that truncates require 420 * two sync transactions instead of one for wsync filesystems. 421 * One for the truncate and one for the timestamps since we 422 * don't want to change the timestamps unless we're sure the 423 * truncate worked. Truncates are less than 1% of the laddis 424 * mix so this probably isn't worth the trouble to optimize. 425 */ 426 code = 0; 427 if (mp->m_flags & XFS_MOUNT_WSYNC) 428 xfs_trans_set_sync(tp); 429 430 code = xfs_trans_commit(tp, commit_flags); 431 432 xfs_iunlock(ip, lock_flags); 433 434 /* 435 * Release any dquot(s) the inode had kept before chown. 436 */ 437 xfs_qm_dqrele(olddquot1); 438 xfs_qm_dqrele(olddquot2); 439 xfs_qm_dqrele(udqp); 440 xfs_qm_dqrele(gdqp); 441 442 if (code) 443 return code; 444 445 if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { 446 code = -xfs_acl_chmod(inode); 447 if (code) 448 return XFS_ERROR(code); 449 } 450 451 return 0; 452 453 abort_return: 454 commit_flags |= XFS_TRANS_ABORT; 455 error_return: 456 xfs_qm_dqrele(udqp); 457 xfs_qm_dqrele(gdqp); 458 if (tp) { 459 xfs_trans_cancel(tp, commit_flags); 460 } 461 if (lock_flags != 0) { 462 xfs_iunlock(ip, lock_flags); 463 } 464 return code; 465} 466 467/* 468 * The maximum pathlen is 1024 bytes. Since the minimum file system 469 * blocksize is 512 bytes, we can get a max of 2 extents back from 470 * bmapi. 471 */ 472#define SYMLINK_MAPS 2 473 474STATIC int 475xfs_readlink_bmap( 476 xfs_inode_t *ip, 477 char *link) 478{ 479 xfs_mount_t *mp = ip->i_mount; 480 int pathlen = ip->i_d.di_size; 481 int nmaps = SYMLINK_MAPS; 482 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 483 xfs_daddr_t d; 484 int byte_cnt; 485 int n; 486 xfs_buf_t *bp; 487 int error = 0; 488 489 error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 0, NULL, 0, 490 mval, &nmaps, NULL); 491 if (error) 492 goto out; 493 494 for (n = 0; n < nmaps; n++) { 495 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 496 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 497 498 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 499 XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK); 500 error = XFS_BUF_GETERROR(bp); 501 if (error) { 502 xfs_ioerror_alert("xfs_readlink", 503 ip->i_mount, bp, XFS_BUF_ADDR(bp)); 504 xfs_buf_relse(bp); 505 goto out; 506 } 507 if (pathlen < byte_cnt) 508 byte_cnt = pathlen; 509 pathlen -= byte_cnt; 510 511 memcpy(link, XFS_BUF_PTR(bp), byte_cnt); 512 xfs_buf_relse(bp); 513 } 514 515 link[ip->i_d.di_size] = '\0'; 516 error = 0; 517 518 out: 519 return error; 520} 521 522int 523xfs_readlink( 524 xfs_inode_t *ip, 525 char *link) 526{ 527 xfs_mount_t *mp = ip->i_mount; 528 int pathlen; 529 int error = 0; 530 531 trace_xfs_readlink(ip); 532 533 if (XFS_FORCED_SHUTDOWN(mp)) 534 return XFS_ERROR(EIO); 535 536 xfs_ilock(ip, XFS_ILOCK_SHARED); 537 538 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK); 539 ASSERT(ip->i_d.di_size <= MAXPATHLEN); 540 541 pathlen = ip->i_d.di_size; 542 if (!pathlen) 543 goto out; 544 545 if (ip->i_df.if_flags & XFS_IFINLINE) { 546 memcpy(link, ip->i_df.if_u1.if_data, pathlen); 547 link[pathlen] = '\0'; 548 } else { 549 error = xfs_readlink_bmap(ip, link); 550 } 551 552 out: 553 xfs_iunlock(ip, XFS_ILOCK_SHARED); 554 return error; 555} 556 557/* 558 * Flags for xfs_free_eofblocks 559 */ 560#define XFS_FREE_EOF_TRYLOCK (1<<0) 561 562/* 563 * This is called by xfs_inactive to free any blocks beyond eof 564 * when the link count isn't zero and by xfs_dm_punch_hole() when 565 * punching a hole to EOF. 566 */ 567STATIC int 568xfs_free_eofblocks( 569 xfs_mount_t *mp, 570 xfs_inode_t *ip, 571 int flags) 572{ 573 xfs_trans_t *tp; 574 int error; 575 xfs_fileoff_t end_fsb; 576 xfs_fileoff_t last_fsb; 577 xfs_filblks_t map_len; 578 int nimaps; 579 xfs_bmbt_irec_t imap; 580 581 /* 582 * Figure out if there are any blocks beyond the end 583 * of the file. If not, then there is nothing to do. 584 */ 585 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size)); 586 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 587 if (last_fsb <= end_fsb) 588 return 0; 589 map_len = last_fsb - end_fsb; 590 591 nimaps = 1; 592 xfs_ilock(ip, XFS_ILOCK_SHARED); 593 error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0, 594 NULL, 0, &imap, &nimaps, NULL); 595 xfs_iunlock(ip, XFS_ILOCK_SHARED); 596 597 if (!error && (nimaps != 0) && 598 (imap.br_startblock != HOLESTARTBLOCK || 599 ip->i_delayed_blks)) { 600 /* 601 * Attach the dquots to the inode up front. 602 */ 603 error = xfs_qm_dqattach(ip, 0); 604 if (error) 605 return error; 606 607 /* 608 * There are blocks after the end of file. 609 * Free them up now by truncating the file to 610 * its current size. 611 */ 612 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 613 614 /* 615 * Do the xfs_itruncate_start() call before 616 * reserving any log space because 617 * itruncate_start will call into the buffer 618 * cache and we can't 619 * do that within a transaction. 620 */ 621 if (flags & XFS_FREE_EOF_TRYLOCK) { 622 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 623 xfs_trans_cancel(tp, 0); 624 return 0; 625 } 626 } else { 627 xfs_ilock(ip, XFS_IOLOCK_EXCL); 628 } 629 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 630 ip->i_size); 631 if (error) { 632 xfs_trans_cancel(tp, 0); 633 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 634 return error; 635 } 636 637 error = xfs_trans_reserve(tp, 0, 638 XFS_ITRUNCATE_LOG_RES(mp), 639 0, XFS_TRANS_PERM_LOG_RES, 640 XFS_ITRUNCATE_LOG_COUNT); 641 if (error) { 642 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 643 xfs_trans_cancel(tp, 0); 644 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 645 return error; 646 } 647 648 xfs_ilock(ip, XFS_ILOCK_EXCL); 649 xfs_trans_ijoin(tp, ip); 650 651 error = xfs_itruncate_finish(&tp, ip, 652 ip->i_size, 653 XFS_DATA_FORK, 654 0); 655 /* 656 * If we get an error at this point we 657 * simply don't bother truncating the file. 658 */ 659 if (error) { 660 xfs_trans_cancel(tp, 661 (XFS_TRANS_RELEASE_LOG_RES | 662 XFS_TRANS_ABORT)); 663 } else { 664 error = xfs_trans_commit(tp, 665 XFS_TRANS_RELEASE_LOG_RES); 666 } 667 xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL); 668 } 669 return error; 670} 671 672/* 673 * Free a symlink that has blocks associated with it. 674 */ 675STATIC int 676xfs_inactive_symlink_rmt( 677 xfs_inode_t *ip, 678 xfs_trans_t **tpp) 679{ 680 xfs_buf_t *bp; 681 int committed; 682 int done; 683 int error; 684 xfs_fsblock_t first_block; 685 xfs_bmap_free_t free_list; 686 int i; 687 xfs_mount_t *mp; 688 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 689 int nmaps; 690 xfs_trans_t *ntp; 691 int size; 692 xfs_trans_t *tp; 693 694 tp = *tpp; 695 mp = ip->i_mount; 696 ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip)); 697 /* 698 * We're freeing a symlink that has some 699 * blocks allocated to it. Free the 700 * blocks here. We know that we've got 701 * either 1 or 2 extents and that we can 702 * free them all in one bunmapi call. 703 */ 704 ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); 705 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 706 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 707 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 708 xfs_trans_cancel(tp, 0); 709 *tpp = NULL; 710 return error; 711 } 712 /* 713 * Lock the inode, fix the size, and join it to the transaction. 714 * Hold it so in the normal path, we still have it locked for 715 * the second transaction. In the error paths we need it 716 * held so the cancel won't rele it, see below. 717 */ 718 xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 719 size = (int)ip->i_d.di_size; 720 ip->i_d.di_size = 0; 721 xfs_trans_ijoin(tp, ip); 722 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 723 /* 724 * Find the block(s) so we can inval and unmap them. 725 */ 726 done = 0; 727 xfs_bmap_init(&free_list, &first_block); 728 nmaps = ARRAY_SIZE(mval); 729 if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), 730 XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, 731 &free_list))) 732 goto error0; 733 /* 734 * Invalidate the block(s). 735 */ 736 for (i = 0; i < nmaps; i++) { 737 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, 738 XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), 739 XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); 740 xfs_trans_binval(tp, bp); 741 } 742 /* 743 * Unmap the dead block(s) to the free_list. 744 */ 745 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, 746 &first_block, &free_list, &done))) 747 goto error1; 748 ASSERT(done); 749 /* 750 * Commit the first transaction. This logs the EFI and the inode. 751 */ 752 if ((error = xfs_bmap_finish(&tp, &free_list, &committed))) 753 goto error1; 754 /* 755 * The transaction must have been committed, since there were 756 * actually extents freed by xfs_bunmapi. See xfs_bmap_finish. 757 * The new tp has the extent freeing and EFDs. 758 */ 759 ASSERT(committed); 760 /* 761 * The first xact was committed, so add the inode to the new one. 762 * Mark it dirty so it will be logged and moved forward in the log as 763 * part of every commit. 764 */ 765 xfs_trans_ijoin(tp, ip); 766 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 767 /* 768 * Get a new, empty transaction to return to our caller. 769 */ 770 ntp = xfs_trans_dup(tp); 771 /* 772 * Commit the transaction containing extent freeing and EFDs. 773 * If we get an error on the commit here or on the reserve below, 774 * we need to unlock the inode since the new transaction doesn't 775 * have the inode attached. 776 */ 777 error = xfs_trans_commit(tp, 0); 778 tp = ntp; 779 if (error) { 780 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 781 goto error0; 782 } 783 /* 784 * transaction commit worked ok so we can drop the extra ticket 785 * reference that we gained in xfs_trans_dup() 786 */ 787 xfs_log_ticket_put(tp->t_ticket); 788 789 /* 790 * Remove the memory for extent descriptions (just bookkeeping). 791 */ 792 if (ip->i_df.if_bytes) 793 xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK); 794 ASSERT(ip->i_df.if_bytes == 0); 795 /* 796 * Put an itruncate log reservation in the new transaction 797 * for our caller. 798 */ 799 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 800 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 801 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 802 goto error0; 803 } 804 /* 805 * Return with the inode locked but not joined to the transaction. 806 */ 807 *tpp = tp; 808 return 0; 809 810 error1: 811 xfs_bmap_cancel(&free_list); 812 error0: 813 /* 814 * Have to come here with the inode locked and either 815 * (held and in the transaction) or (not in the transaction). 816 * If the inode isn't held then cancel would iput it, but 817 * that's wrong since this is inactive and the vnode ref 818 * count is 0 already. 819 * Cancel won't do anything to the inode if held, but it still 820 * needs to be locked until the cancel is done, if it was 821 * joined to the transaction. 822 */ 823 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 824 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 825 *tpp = NULL; 826 return error; 827 828} 829 830STATIC int 831xfs_inactive_symlink_local( 832 xfs_inode_t *ip, 833 xfs_trans_t **tpp) 834{ 835 int error; 836 837 ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip)); 838 /* 839 * We're freeing a symlink which fit into 840 * the inode. Just free the memory used 841 * to hold the old symlink. 842 */ 843 error = xfs_trans_reserve(*tpp, 0, 844 XFS_ITRUNCATE_LOG_RES(ip->i_mount), 845 0, XFS_TRANS_PERM_LOG_RES, 846 XFS_ITRUNCATE_LOG_COUNT); 847 848 if (error) { 849 xfs_trans_cancel(*tpp, 0); 850 *tpp = NULL; 851 return error; 852 } 853 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 854 855 /* 856 * Zero length symlinks _can_ exist. 857 */ 858 if (ip->i_df.if_bytes > 0) { 859 xfs_idata_realloc(ip, 860 -(ip->i_df.if_bytes), 861 XFS_DATA_FORK); 862 ASSERT(ip->i_df.if_bytes == 0); 863 } 864 return 0; 865} 866 867STATIC int 868xfs_inactive_attrs( 869 xfs_inode_t *ip, 870 xfs_trans_t **tpp) 871{ 872 xfs_trans_t *tp; 873 int error; 874 xfs_mount_t *mp; 875 876 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 877 tp = *tpp; 878 mp = ip->i_mount; 879 ASSERT(ip->i_d.di_forkoff != 0); 880 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 881 xfs_iunlock(ip, XFS_ILOCK_EXCL); 882 if (error) 883 goto error_unlock; 884 885 error = xfs_attr_inactive(ip); 886 if (error) 887 goto error_unlock; 888 889 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 890 error = xfs_trans_reserve(tp, 0, 891 XFS_IFREE_LOG_RES(mp), 892 0, XFS_TRANS_PERM_LOG_RES, 893 XFS_INACTIVE_LOG_COUNT); 894 if (error) 895 goto error_cancel; 896 897 xfs_ilock(ip, XFS_ILOCK_EXCL); 898 xfs_trans_ijoin(tp, ip); 899 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 900 901 ASSERT(ip->i_d.di_anextents == 0); 902 903 *tpp = tp; 904 return 0; 905 906error_cancel: 907 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 908 xfs_trans_cancel(tp, 0); 909error_unlock: 910 *tpp = NULL; 911 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 912 return error; 913} 914 915int 916xfs_release( 917 xfs_inode_t *ip) 918{ 919 xfs_mount_t *mp = ip->i_mount; 920 int error; 921 922 if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0)) 923 return 0; 924 925 /* If this is a read-only mount, don't do this (would generate I/O) */ 926 if (mp->m_flags & XFS_MOUNT_RDONLY) 927 return 0; 928 929 if (!XFS_FORCED_SHUTDOWN(mp)) { 930 int truncated; 931 932 /* 933 * If we are using filestreams, and we have an unlinked 934 * file that we are processing the last close on, then nothing 935 * will be able to reopen and write to this file. Purge this 936 * inode from the filestreams cache so that it doesn't delay 937 * teardown of the inode. 938 */ 939 if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) 940 xfs_filestream_deassociate(ip); 941 942 /* 943 * If we previously truncated this file and removed old data 944 * in the process, we want to initiate "early" writeout on 945 * the last close. This is an attempt to combat the notorious 946 * NULL files problem which is particularly noticable from a 947 * truncate down, buffered (re-)write (delalloc), followed by 948 * a crash. What we are effectively doing here is 949 * significantly reducing the time window where we'd otherwise 950 * be exposed to that problem. 951 */ 952 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); 953 if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) 954 xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); 955 } 956 957 if (ip->i_d.di_nlink != 0) { 958 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 959 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || 960 ip->i_delayed_blks > 0)) && 961 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 962 (!(ip->i_d.di_flags & 963 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 964 965 /* 966 * If we can't get the iolock just skip truncating 967 * the blocks past EOF because we could deadlock 968 * with the mmap_sem otherwise. We'll get another 969 * chance to drop them once the last reference to 970 * the inode is dropped, so we'll never leak blocks 971 * permanently. 972 */ 973 error = xfs_free_eofblocks(mp, ip, 974 XFS_FREE_EOF_TRYLOCK); 975 if (error) 976 return error; 977 } 978 } 979 980 return 0; 981} 982 983/* 984 * xfs_inactive 985 * 986 * This is called when the vnode reference count for the vnode 987 * goes to zero. If the file has been unlinked, then it must 988 * now be truncated. Also, we clear all of the read-ahead state 989 * kept for the inode here since the file is now closed. 990 */ 991int 992xfs_inactive( 993 xfs_inode_t *ip) 994{ 995 xfs_bmap_free_t free_list; 996 xfs_fsblock_t first_block; 997 int committed; 998 xfs_trans_t *tp; 999 xfs_mount_t *mp; 1000 int error; 1001 int truncate; 1002 1003 /* 1004 * If the inode is already free, then there can be nothing 1005 * to clean up here. 1006 */ 1007 if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) { 1008 ASSERT(ip->i_df.if_real_bytes == 0); 1009 ASSERT(ip->i_df.if_broot_bytes == 0); 1010 return VN_INACTIVE_CACHE; 1011 } 1012 1013 /* 1014 * Only do a truncate if it's a regular file with 1015 * some actual space in it. It's OK to look at the 1016 * inode's fields without the lock because we're the 1017 * only one with a reference to the inode. 1018 */ 1019 truncate = ((ip->i_d.di_nlink == 0) && 1020 ((ip->i_d.di_size != 0) || (ip->i_size != 0) || 1021 (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && 1022 ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); 1023 1024 mp = ip->i_mount; 1025 1026 error = 0; 1027 1028 /* If this is a read-only mount, don't do this (would generate I/O) */ 1029 if (mp->m_flags & XFS_MOUNT_RDONLY) 1030 goto out; 1031 1032 if (ip->i_d.di_nlink != 0) { 1033 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1034 ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || 1035 ip->i_delayed_blks > 0)) && 1036 (ip->i_df.if_flags & XFS_IFEXTENTS) && 1037 (!(ip->i_d.di_flags & 1038 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 1039 (ip->i_delayed_blks != 0)))) { 1040 error = xfs_free_eofblocks(mp, ip, 0); 1041 if (error) 1042 return VN_INACTIVE_CACHE; 1043 } 1044 goto out; 1045 } 1046 1047 ASSERT(ip->i_d.di_nlink == 0); 1048 1049 error = xfs_qm_dqattach(ip, 0); 1050 if (error) 1051 return VN_INACTIVE_CACHE; 1052 1053 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1054 if (truncate) { 1055 /* 1056 * Do the xfs_itruncate_start() call before 1057 * reserving any log space because itruncate_start 1058 * will call into the buffer cache and we can't 1059 * do that within a transaction. 1060 */ 1061 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1062 1063 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); 1064 if (error) { 1065 xfs_trans_cancel(tp, 0); 1066 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1067 return VN_INACTIVE_CACHE; 1068 } 1069 1070 error = xfs_trans_reserve(tp, 0, 1071 XFS_ITRUNCATE_LOG_RES(mp), 1072 0, XFS_TRANS_PERM_LOG_RES, 1073 XFS_ITRUNCATE_LOG_COUNT); 1074 if (error) { 1075 /* Don't call itruncate_cleanup */ 1076 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1077 xfs_trans_cancel(tp, 0); 1078 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1079 return VN_INACTIVE_CACHE; 1080 } 1081 1082 xfs_ilock(ip, XFS_ILOCK_EXCL); 1083 xfs_trans_ijoin(tp, ip); 1084 1085 /* 1086 * normally, we have to run xfs_itruncate_finish sync. 1087 * But if filesystem is wsync and we're in the inactive 1088 * path, then we know that nlink == 0, and that the 1089 * xaction that made nlink == 0 is permanently committed 1090 * since xfs_remove runs as a synchronous transaction. 1091 */ 1092 error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1093 (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0)); 1094 1095 if (error) { 1096 xfs_trans_cancel(tp, 1097 XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1098 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1099 return VN_INACTIVE_CACHE; 1100 } 1101 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) { 1102 1103 /* 1104 * If we get an error while cleaning up a 1105 * symlink we bail out. 1106 */ 1107 error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ? 1108 xfs_inactive_symlink_rmt(ip, &tp) : 1109 xfs_inactive_symlink_local(ip, &tp); 1110 1111 if (error) { 1112 ASSERT(tp == NULL); 1113 return VN_INACTIVE_CACHE; 1114 } 1115 1116 xfs_trans_ijoin(tp, ip); 1117 } else { 1118 error = xfs_trans_reserve(tp, 0, 1119 XFS_IFREE_LOG_RES(mp), 1120 0, XFS_TRANS_PERM_LOG_RES, 1121 XFS_INACTIVE_LOG_COUNT); 1122 if (error) { 1123 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1124 xfs_trans_cancel(tp, 0); 1125 return VN_INACTIVE_CACHE; 1126 } 1127 1128 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1129 xfs_trans_ijoin(tp, ip); 1130 } 1131 1132 /* 1133 * If there are attributes associated with the file 1134 * then blow them away now. The code calls a routine 1135 * that recursively deconstructs the attribute fork. 1136 * We need to just commit the current transaction 1137 * because we can't use it for xfs_attr_inactive(). 1138 */ 1139 if (ip->i_d.di_anextents > 0) { 1140 error = xfs_inactive_attrs(ip, &tp); 1141 /* 1142 * If we got an error, the transaction is already 1143 * cancelled, and the inode is unlocked. Just get out. 1144 */ 1145 if (error) 1146 return VN_INACTIVE_CACHE; 1147 } else if (ip->i_afp) { 1148 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 1149 } 1150 1151 /* 1152 * Free the inode. 1153 */ 1154 xfs_bmap_init(&free_list, &first_block); 1155 error = xfs_ifree(tp, ip, &free_list); 1156 if (error) { 1157 /* 1158 * If we fail to free the inode, shut down. The cancel 1159 * might do that, we need to make sure. Otherwise the 1160 * inode might be lost for a long time or forever. 1161 */ 1162 if (!XFS_FORCED_SHUTDOWN(mp)) { 1163 cmn_err(CE_NOTE, 1164 "xfs_inactive: xfs_ifree() returned an error = %d on %s", 1165 error, mp->m_fsname); 1166 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1167 } 1168 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 1169 } else { 1170 /* 1171 * Credit the quota account(s). The inode is gone. 1172 */ 1173 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 1174 1175 /* 1176 * Just ignore errors at this point. There is nothing we can 1177 * do except to try to keep going. Make sure it's not a silent 1178 * error. 1179 */ 1180 error = xfs_bmap_finish(&tp, &free_list, &committed); 1181 if (error) 1182 xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " 1183 "xfs_bmap_finish() returned error %d", error); 1184 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1185 if (error) 1186 xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " 1187 "xfs_trans_commit() returned error %d", error); 1188 } 1189 1190 /* 1191 * Release the dquots held by inode, if any. 1192 */ 1193 xfs_qm_dqdetach(ip); 1194 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1195 1196 out: 1197 return VN_INACTIVE_CACHE; 1198} 1199 1200/* 1201 * Lookups up an inode from "name". If ci_name is not NULL, then a CI match 1202 * is allowed, otherwise it has to be an exact match. If a CI match is found, 1203 * ci_name->name will point to a the actual name (caller must free) or 1204 * will be set to NULL if an exact match is found. 1205 */ 1206int 1207xfs_lookup( 1208 xfs_inode_t *dp, 1209 struct xfs_name *name, 1210 xfs_inode_t **ipp, 1211 struct xfs_name *ci_name) 1212{ 1213 xfs_ino_t inum; 1214 int error; 1215 uint lock_mode; 1216 1217 trace_xfs_lookup(dp, name); 1218 1219 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 1220 return XFS_ERROR(EIO); 1221 1222 lock_mode = xfs_ilock_map_shared(dp); 1223 error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); 1224 xfs_iunlock_map_shared(dp, lock_mode); 1225 1226 if (error) 1227 goto out; 1228 1229 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp); 1230 if (error) 1231 goto out_free_name; 1232 1233 return 0; 1234 1235out_free_name: 1236 if (ci_name) 1237 kmem_free(ci_name->name); 1238out: 1239 *ipp = NULL; 1240 return error; 1241} 1242 1243int 1244xfs_create( 1245 xfs_inode_t *dp, 1246 struct xfs_name *name, 1247 mode_t mode, 1248 xfs_dev_t rdev, 1249 xfs_inode_t **ipp, 1250 cred_t *credp) 1251{ 1252 int is_dir = S_ISDIR(mode); 1253 struct xfs_mount *mp = dp->i_mount; 1254 struct xfs_inode *ip = NULL; 1255 struct xfs_trans *tp = NULL; 1256 int error; 1257 xfs_bmap_free_t free_list; 1258 xfs_fsblock_t first_block; 1259 boolean_t unlock_dp_on_error = B_FALSE; 1260 uint cancel_flags; 1261 int committed; 1262 xfs_prid_t prid; 1263 struct xfs_dquot *udqp = NULL; 1264 struct xfs_dquot *gdqp = NULL; 1265 uint resblks; 1266 uint log_res; 1267 uint log_count; 1268 1269 trace_xfs_create(dp, name); 1270 1271 if (XFS_FORCED_SHUTDOWN(mp)) 1272 return XFS_ERROR(EIO); 1273 1274 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1275 prid = dp->i_d.di_projid; 1276 else 1277 prid = dfltprid; 1278 1279 /* 1280 * Make sure that we have allocated dquot(s) on disk. 1281 */ 1282 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, 1283 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 1284 if (error) 1285 goto std_return; 1286 1287 if (is_dir) { 1288 rdev = 0; 1289 resblks = XFS_MKDIR_SPACE_RES(mp, name->len); 1290 log_res = XFS_MKDIR_LOG_RES(mp); 1291 log_count = XFS_MKDIR_LOG_COUNT; 1292 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); 1293 } else { 1294 resblks = XFS_CREATE_SPACE_RES(mp, name->len); 1295 log_res = XFS_CREATE_LOG_RES(mp); 1296 log_count = XFS_CREATE_LOG_COUNT; 1297 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); 1298 } 1299 1300 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1301 1302 /* 1303 * Initially assume that the file does not exist and 1304 * reserve the resources for that case. If that is not 1305 * the case we'll drop the one we have and get a more 1306 * appropriate transaction later. 1307 */ 1308 error = xfs_trans_reserve(tp, resblks, log_res, 0, 1309 XFS_TRANS_PERM_LOG_RES, log_count); 1310 if (error == ENOSPC) { 1311 /* flush outstanding delalloc blocks and retry */ 1312 xfs_flush_inodes(dp); 1313 error = xfs_trans_reserve(tp, resblks, log_res, 0, 1314 XFS_TRANS_PERM_LOG_RES, log_count); 1315 } 1316 if (error == ENOSPC) { 1317 /* No space at all so try a "no-allocation" reservation */ 1318 resblks = 0; 1319 error = xfs_trans_reserve(tp, 0, log_res, 0, 1320 XFS_TRANS_PERM_LOG_RES, log_count); 1321 } 1322 if (error) { 1323 cancel_flags = 0; 1324 goto out_trans_cancel; 1325 } 1326 1327 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1328 unlock_dp_on_error = B_TRUE; 1329 1330 /* 1331 * Check for directory link count overflow. 1332 */ 1333 if (is_dir && dp->i_d.di_nlink >= XFS_MAXLINK) { 1334 error = XFS_ERROR(EMLINK); 1335 goto out_trans_cancel; 1336 } 1337 1338 xfs_bmap_init(&free_list, &first_block); 1339 1340 /* 1341 * Reserve disk quota and the inode. 1342 */ 1343 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); 1344 if (error) 1345 goto out_trans_cancel; 1346 1347 error = xfs_dir_canenter(tp, dp, name, resblks); 1348 if (error) 1349 goto out_trans_cancel; 1350 1351 /* 1352 * A newly created regular or special file just has one directory 1353 * entry pointing to them, but a directory also the "." entry 1354 * pointing to itself. 1355 */ 1356 error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, credp, 1357 prid, resblks > 0, &ip, &committed); 1358 if (error) { 1359 if (error == ENOSPC) 1360 goto out_trans_cancel; 1361 goto out_trans_abort; 1362 } 1363 1364 /* 1365 * At this point, we've gotten a newly allocated inode. 1366 * It is locked (and joined to the transaction). 1367 */ 1368 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1369 1370 /* 1371 * Now we join the directory inode to the transaction. We do not do it 1372 * earlier because xfs_dir_ialloc might commit the previous transaction 1373 * (and release all the locks). An error from here on will result in 1374 * the transaction cancel unlocking dp so don't do it explicitly in the 1375 * error path. 1376 */ 1377 xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL); 1378 unlock_dp_on_error = B_FALSE; 1379 1380 error = xfs_dir_createname(tp, dp, name, ip->i_ino, 1381 &first_block, &free_list, resblks ? 1382 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1383 if (error) { 1384 ASSERT(error != ENOSPC); 1385 goto out_trans_abort; 1386 } 1387 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1388 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1389 1390 if (is_dir) { 1391 error = xfs_dir_init(tp, ip, dp); 1392 if (error) 1393 goto out_bmap_cancel; 1394 1395 error = xfs_bumplink(tp, dp); 1396 if (error) 1397 goto out_bmap_cancel; 1398 } 1399 1400 /* 1401 * If this is a synchronous mount, make sure that the 1402 * create transaction goes to disk before returning to 1403 * the user. 1404 */ 1405 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 1406 xfs_trans_set_sync(tp); 1407 1408 /* 1409 * Attach the dquot(s) to the inodes and modify them incore. 1410 * These ids of the inode couldn't have changed since the new 1411 * inode has been locked ever since it was created. 1412 */ 1413 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); 1414 1415 /* 1416 * xfs_trans_commit normally decrements the vnode ref count 1417 * when it unlocks the inode. Since we want to return the 1418 * vnode to the caller, we bump the vnode ref count now. 1419 */ 1420 IHOLD(ip); 1421 1422 error = xfs_bmap_finish(&tp, &free_list, &committed); 1423 if (error) 1424 goto out_abort_rele; 1425 1426 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1427 if (error) { 1428 IRELE(ip); 1429 goto out_dqrele; 1430 } 1431 1432 xfs_qm_dqrele(udqp); 1433 xfs_qm_dqrele(gdqp); 1434 1435 *ipp = ip; 1436 return 0; 1437 1438 out_bmap_cancel: 1439 xfs_bmap_cancel(&free_list); 1440 out_trans_abort: 1441 cancel_flags |= XFS_TRANS_ABORT; 1442 out_trans_cancel: 1443 xfs_trans_cancel(tp, cancel_flags); 1444 out_dqrele: 1445 xfs_qm_dqrele(udqp); 1446 xfs_qm_dqrele(gdqp); 1447 1448 if (unlock_dp_on_error) 1449 xfs_iunlock(dp, XFS_ILOCK_EXCL); 1450 std_return: 1451 return error; 1452 1453 out_abort_rele: 1454 /* 1455 * Wait until after the current transaction is aborted to 1456 * release the inode. This prevents recursive transactions 1457 * and deadlocks from xfs_inactive. 1458 */ 1459 xfs_bmap_cancel(&free_list); 1460 cancel_flags |= XFS_TRANS_ABORT; 1461 xfs_trans_cancel(tp, cancel_flags); 1462 IRELE(ip); 1463 unlock_dp_on_error = B_FALSE; 1464 goto out_dqrele; 1465} 1466 1467#ifdef DEBUG 1468int xfs_locked_n; 1469int xfs_small_retries; 1470int xfs_middle_retries; 1471int xfs_lots_retries; 1472int xfs_lock_delays; 1473#endif 1474 1475/* 1476 * Bump the subclass so xfs_lock_inodes() acquires each lock with 1477 * a different value 1478 */ 1479static inline int 1480xfs_lock_inumorder(int lock_mode, int subclass) 1481{ 1482 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 1483 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; 1484 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) 1485 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; 1486 1487 return lock_mode; 1488} 1489 1490/* 1491 * The following routine will lock n inodes in exclusive mode. 1492 * We assume the caller calls us with the inodes in i_ino order. 1493 * 1494 * We need to detect deadlock where an inode that we lock 1495 * is in the AIL and we start waiting for another inode that is locked 1496 * by a thread in a long running transaction (such as truncate). This can 1497 * result in deadlock since the long running trans might need to wait 1498 * for the inode we just locked in order to push the tail and free space 1499 * in the log. 1500 */ 1501void 1502xfs_lock_inodes( 1503 xfs_inode_t **ips, 1504 int inodes, 1505 uint lock_mode) 1506{ 1507 int attempts = 0, i, j, try_lock; 1508 xfs_log_item_t *lp; 1509 1510 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 1511 1512 try_lock = 0; 1513 i = 0; 1514 1515again: 1516 for (; i < inodes; i++) { 1517 ASSERT(ips[i]); 1518 1519 if (i && (ips[i] == ips[i-1])) /* Already locked */ 1520 continue; 1521 1522 /* 1523 * If try_lock is not set yet, make sure all locked inodes 1524 * are not in the AIL. 1525 * If any are, set try_lock to be used later. 1526 */ 1527 1528 if (!try_lock) { 1529 for (j = (i - 1); j >= 0 && !try_lock; j--) { 1530 lp = (xfs_log_item_t *)ips[j]->i_itemp; 1531 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 1532 try_lock++; 1533 } 1534 } 1535 } 1536 1537 /* 1538 * If any of the previous locks we have locked is in the AIL, 1539 * we must TRY to get the second and subsequent locks. If 1540 * we can't get any, we must release all we have 1541 * and try again. 1542 */ 1543 1544 if (try_lock) { 1545 /* try_lock must be 0 if i is 0. */ 1546 /* 1547 * try_lock means we have an inode locked 1548 * that is in the AIL. 1549 */ 1550 ASSERT(i != 0); 1551 if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { 1552 attempts++; 1553 1554 /* 1555 * Unlock all previous guys and try again. 1556 * xfs_iunlock will try to push the tail 1557 * if the inode is in the AIL. 1558 */ 1559 1560 for(j = i - 1; j >= 0; j--) { 1561 1562 /* 1563 * Check to see if we've already 1564 * unlocked this one. 1565 * Not the first one going back, 1566 * and the inode ptr is the same. 1567 */ 1568 if ((j != (i - 1)) && ips[j] == 1569 ips[j+1]) 1570 continue; 1571 1572 xfs_iunlock(ips[j], lock_mode); 1573 } 1574 1575 if ((attempts % 5) == 0) { 1576 delay(1); /* Don't just spin the CPU */ 1577#ifdef DEBUG 1578 xfs_lock_delays++; 1579#endif 1580 } 1581 i = 0; 1582 try_lock = 0; 1583 goto again; 1584 } 1585 } else { 1586 xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); 1587 } 1588 } 1589 1590#ifdef DEBUG 1591 if (attempts) { 1592 if (attempts < 5) xfs_small_retries++; 1593 else if (attempts < 100) xfs_middle_retries++; 1594 else xfs_lots_retries++; 1595 } else { 1596 xfs_locked_n++; 1597 } 1598#endif 1599} 1600 1601/* 1602 * xfs_lock_two_inodes() can only be used to lock one type of lock 1603 * at a time - the iolock or the ilock, but not both at once. If 1604 * we lock both at once, lockdep will report false positives saying 1605 * we have violated locking orders. 1606 */ 1607void 1608xfs_lock_two_inodes( 1609 xfs_inode_t *ip0, 1610 xfs_inode_t *ip1, 1611 uint lock_mode) 1612{ 1613 xfs_inode_t *temp; 1614 int attempts = 0; 1615 xfs_log_item_t *lp; 1616 1617 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 1618 ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); 1619 ASSERT(ip0->i_ino != ip1->i_ino); 1620 1621 if (ip0->i_ino > ip1->i_ino) { 1622 temp = ip0; 1623 ip0 = ip1; 1624 ip1 = temp; 1625 } 1626 1627 again: 1628 xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); 1629 1630 /* 1631 * If the first lock we have locked is in the AIL, we must TRY to get 1632 * the second lock. If we can't get it, we must release the first one 1633 * and try again. 1634 */ 1635 lp = (xfs_log_item_t *)ip0->i_itemp; 1636 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 1637 if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { 1638 xfs_iunlock(ip0, lock_mode); 1639 if ((++attempts % 5) == 0) 1640 delay(1); /* Don't just spin the CPU */ 1641 goto again; 1642 } 1643 } else { 1644 xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); 1645 } 1646} 1647 1648int 1649xfs_remove( 1650 xfs_inode_t *dp, 1651 struct xfs_name *name, 1652 xfs_inode_t *ip) 1653{ 1654 xfs_mount_t *mp = dp->i_mount; 1655 xfs_trans_t *tp = NULL; 1656 int is_dir = S_ISDIR(ip->i_d.di_mode); 1657 int error = 0; 1658 xfs_bmap_free_t free_list; 1659 xfs_fsblock_t first_block; 1660 int cancel_flags; 1661 int committed; 1662 int link_zero; 1663 uint resblks; 1664 uint log_count; 1665 1666 trace_xfs_remove(dp, name); 1667 1668 if (XFS_FORCED_SHUTDOWN(mp)) 1669 return XFS_ERROR(EIO); 1670 1671 error = xfs_qm_dqattach(dp, 0); 1672 if (error) 1673 goto std_return; 1674 1675 error = xfs_qm_dqattach(ip, 0); 1676 if (error) 1677 goto std_return; 1678 1679 if (is_dir) { 1680 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); 1681 log_count = XFS_DEFAULT_LOG_COUNT; 1682 } else { 1683 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); 1684 log_count = XFS_REMOVE_LOG_COUNT; 1685 } 1686 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1687 1688 /* 1689 * We try to get the real space reservation first, 1690 * allowing for directory btree deletion(s) implying 1691 * possible bmap insert(s). If we can't get the space 1692 * reservation then we use 0 instead, and avoid the bmap 1693 * btree insert(s) in the directory code by, if the bmap 1694 * insert tries to happen, instead trimming the LAST 1695 * block from the directory. 1696 */ 1697 resblks = XFS_REMOVE_SPACE_RES(mp); 1698 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 1699 XFS_TRANS_PERM_LOG_RES, log_count); 1700 if (error == ENOSPC) { 1701 resblks = 0; 1702 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 1703 XFS_TRANS_PERM_LOG_RES, log_count); 1704 } 1705 if (error) { 1706 ASSERT(error != ENOSPC); 1707 cancel_flags = 0; 1708 goto out_trans_cancel; 1709 } 1710 1711 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); 1712 1713 xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL); 1714 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); 1715 1716 /* 1717 * If we're removing a directory perform some additional validation. 1718 */ 1719 if (is_dir) { 1720 ASSERT(ip->i_d.di_nlink >= 2); 1721 if (ip->i_d.di_nlink != 2) { 1722 error = XFS_ERROR(ENOTEMPTY); 1723 goto out_trans_cancel; 1724 } 1725 if (!xfs_dir_isempty(ip)) { 1726 error = XFS_ERROR(ENOTEMPTY); 1727 goto out_trans_cancel; 1728 } 1729 } 1730 1731 xfs_bmap_init(&free_list, &first_block); 1732 error = xfs_dir_removename(tp, dp, name, ip->i_ino, 1733 &first_block, &free_list, resblks); 1734 if (error) { 1735 ASSERT(error != ENOENT); 1736 goto out_bmap_cancel; 1737 } 1738 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1739 1740 if (is_dir) { 1741 /* 1742 * Drop the link from ip's "..". 1743 */ 1744 error = xfs_droplink(tp, dp); 1745 if (error) 1746 goto out_bmap_cancel; 1747 1748 /* 1749 * Drop the "." link from ip to self. 1750 */ 1751 error = xfs_droplink(tp, ip); 1752 if (error) 1753 goto out_bmap_cancel; 1754 } else { 1755 /* 1756 * When removing a non-directory we need to log the parent 1757 * inode here. For a directory this is done implicitly 1758 * by the xfs_droplink call for the ".." entry. 1759 */ 1760 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1761 } 1762 1763 /* 1764 * Drop the link from dp to ip. 1765 */ 1766 error = xfs_droplink(tp, ip); 1767 if (error) 1768 goto out_bmap_cancel; 1769 1770 /* 1771 * Determine if this is the last link while 1772 * we are in the transaction. 1773 */ 1774 link_zero = (ip->i_d.di_nlink == 0); 1775 1776 /* 1777 * If this is a synchronous mount, make sure that the 1778 * remove transaction goes to disk before returning to 1779 * the user. 1780 */ 1781 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 1782 xfs_trans_set_sync(tp); 1783 1784 error = xfs_bmap_finish(&tp, &free_list, &committed); 1785 if (error) 1786 goto out_bmap_cancel; 1787 1788 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1789 if (error) 1790 goto std_return; 1791 1792 /* 1793 * If we are using filestreams, kill the stream association. 1794 * If the file is still open it may get a new one but that 1795 * will get killed on last close in xfs_close() so we don't 1796 * have to worry about that. 1797 */ 1798 if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) 1799 xfs_filestream_deassociate(ip); 1800 1801 return 0; 1802 1803 out_bmap_cancel: 1804 xfs_bmap_cancel(&free_list); 1805 cancel_flags |= XFS_TRANS_ABORT; 1806 out_trans_cancel: 1807 xfs_trans_cancel(tp, cancel_flags); 1808 std_return: 1809 return error; 1810} 1811 1812int 1813xfs_link( 1814 xfs_inode_t *tdp, 1815 xfs_inode_t *sip, 1816 struct xfs_name *target_name) 1817{ 1818 xfs_mount_t *mp = tdp->i_mount; 1819 xfs_trans_t *tp; 1820 int error; 1821 xfs_bmap_free_t free_list; 1822 xfs_fsblock_t first_block; 1823 int cancel_flags; 1824 int committed; 1825 int resblks; 1826 1827 trace_xfs_link(tdp, target_name); 1828 1829 ASSERT(!S_ISDIR(sip->i_d.di_mode)); 1830 1831 if (XFS_FORCED_SHUTDOWN(mp)) 1832 return XFS_ERROR(EIO); 1833 1834 error = xfs_qm_dqattach(sip, 0); 1835 if (error) 1836 goto std_return; 1837 1838 error = xfs_qm_dqattach(tdp, 0); 1839 if (error) 1840 goto std_return; 1841 1842 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); 1843 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1844 resblks = XFS_LINK_SPACE_RES(mp, target_name->len); 1845 error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0, 1846 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 1847 if (error == ENOSPC) { 1848 resblks = 0; 1849 error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0, 1850 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 1851 } 1852 if (error) { 1853 cancel_flags = 0; 1854 goto error_return; 1855 } 1856 1857 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); 1858 1859 xfs_trans_ijoin_ref(tp, sip, XFS_ILOCK_EXCL); 1860 xfs_trans_ijoin_ref(tp, tdp, XFS_ILOCK_EXCL); 1861 1862 /* 1863 * If the source has too many links, we can't make any more to it. 1864 */ 1865 if (sip->i_d.di_nlink >= XFS_MAXLINK) { 1866 error = XFS_ERROR(EMLINK); 1867 goto error_return; 1868 } 1869 1870 /* 1871 * If we are using project inheritance, we only allow hard link 1872 * creation in our tree when the project IDs are the same; else 1873 * the tree quota mechanism could be circumvented. 1874 */ 1875 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 1876 (tdp->i_d.di_projid != sip->i_d.di_projid))) { 1877 error = XFS_ERROR(EXDEV); 1878 goto error_return; 1879 } 1880 1881 error = xfs_dir_canenter(tp, tdp, target_name, resblks); 1882 if (error) 1883 goto error_return; 1884 1885 xfs_bmap_init(&free_list, &first_block); 1886 1887 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, 1888 &first_block, &free_list, resblks); 1889 if (error) 1890 goto abort_return; 1891 xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1892 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 1893 1894 error = xfs_bumplink(tp, sip); 1895 if (error) 1896 goto abort_return; 1897 1898 /* 1899 * If this is a synchronous mount, make sure that the 1900 * link transaction goes to disk before returning to 1901 * the user. 1902 */ 1903 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 1904 xfs_trans_set_sync(tp); 1905 } 1906 1907 error = xfs_bmap_finish (&tp, &free_list, &committed); 1908 if (error) { 1909 xfs_bmap_cancel(&free_list); 1910 goto abort_return; 1911 } 1912 1913 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1914 1915 abort_return: 1916 cancel_flags |= XFS_TRANS_ABORT; 1917 error_return: 1918 xfs_trans_cancel(tp, cancel_flags); 1919 std_return: 1920 return error; 1921} 1922 1923int 1924xfs_symlink( 1925 xfs_inode_t *dp, 1926 struct xfs_name *link_name, 1927 const char *target_path, 1928 mode_t mode, 1929 xfs_inode_t **ipp, 1930 cred_t *credp) 1931{ 1932 xfs_mount_t *mp = dp->i_mount; 1933 xfs_trans_t *tp; 1934 xfs_inode_t *ip; 1935 int error; 1936 int pathlen; 1937 xfs_bmap_free_t free_list; 1938 xfs_fsblock_t first_block; 1939 boolean_t unlock_dp_on_error = B_FALSE; 1940 uint cancel_flags; 1941 int committed; 1942 xfs_fileoff_t first_fsb; 1943 xfs_filblks_t fs_blocks; 1944 int nmaps; 1945 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 1946 xfs_daddr_t d; 1947 const char *cur_chunk; 1948 int byte_cnt; 1949 int n; 1950 xfs_buf_t *bp; 1951 xfs_prid_t prid; 1952 struct xfs_dquot *udqp, *gdqp; 1953 uint resblks; 1954 1955 *ipp = NULL; 1956 error = 0; 1957 ip = NULL; 1958 tp = NULL; 1959 1960 trace_xfs_symlink(dp, link_name); 1961 1962 if (XFS_FORCED_SHUTDOWN(mp)) 1963 return XFS_ERROR(EIO); 1964 1965 /* 1966 * Check component lengths of the target path name. 1967 */ 1968 pathlen = strlen(target_path); 1969 if (pathlen >= MAXPATHLEN) /* total string too long */ 1970 return XFS_ERROR(ENAMETOOLONG); 1971 1972 udqp = gdqp = NULL; 1973 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1974 prid = dp->i_d.di_projid; 1975 else 1976 prid = (xfs_prid_t)dfltprid; 1977 1978 /* 1979 * Make sure that we have allocated dquot(s) on disk. 1980 */ 1981 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, 1982 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 1983 if (error) 1984 goto std_return; 1985 1986 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); 1987 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1988 /* 1989 * The symlink will fit into the inode data fork? 1990 * There can't be any attributes so we get the whole variable part. 1991 */ 1992 if (pathlen <= XFS_LITINO(mp)) 1993 fs_blocks = 0; 1994 else 1995 fs_blocks = XFS_B_TO_FSB(mp, pathlen); 1996 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); 1997 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, 1998 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 1999 if (error == ENOSPC && fs_blocks == 0) { 2000 resblks = 0; 2001 error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0, 2002 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 2003 } 2004 if (error) { 2005 cancel_flags = 0; 2006 goto error_return; 2007 } 2008 2009 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 2010 unlock_dp_on_error = B_TRUE; 2011 2012 /* 2013 * Check whether the directory allows new symlinks or not. 2014 */ 2015 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { 2016 error = XFS_ERROR(EPERM); 2017 goto error_return; 2018 } 2019 2020 /* 2021 * Reserve disk quota : blocks and inode. 2022 */ 2023 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); 2024 if (error) 2025 goto error_return; 2026 2027 /* 2028 * Check for ability to enter directory entry, if no space reserved. 2029 */ 2030 error = xfs_dir_canenter(tp, dp, link_name, resblks); 2031 if (error) 2032 goto error_return; 2033 /* 2034 * Initialize the bmap freelist prior to calling either 2035 * bmapi or the directory create code. 2036 */ 2037 xfs_bmap_init(&free_list, &first_block); 2038 2039 /* 2040 * Allocate an inode for the symlink. 2041 */ 2042 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 2043 1, 0, credp, prid, resblks > 0, &ip, NULL); 2044 if (error) { 2045 if (error == ENOSPC) 2046 goto error_return; 2047 goto error1; 2048 } 2049 2050 /* 2051 * An error after we've joined dp to the transaction will result in the 2052 * transaction cancel unlocking dp so don't do it explicitly in the 2053 * error path. 2054 */ 2055 xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL); 2056 unlock_dp_on_error = B_FALSE; 2057 2058 /* 2059 * Also attach the dquot(s) to it, if applicable. 2060 */ 2061 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); 2062 2063 if (resblks) 2064 resblks -= XFS_IALLOC_SPACE_RES(mp); 2065 /* 2066 * If the symlink will fit into the inode, write it inline. 2067 */ 2068 if (pathlen <= XFS_IFORK_DSIZE(ip)) { 2069 xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK); 2070 memcpy(ip->i_df.if_u1.if_data, target_path, pathlen); 2071 ip->i_d.di_size = pathlen; 2072 2073 /* 2074 * The inode was initially created in extent format. 2075 */ 2076 ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT); 2077 ip->i_df.if_flags |= XFS_IFINLINE; 2078 2079 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL; 2080 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); 2081 2082 } else { 2083 first_fsb = 0; 2084 nmaps = SYMLINK_MAPS; 2085 2086 error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, 2087 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, 2088 &first_block, resblks, mval, &nmaps, 2089 &free_list); 2090 if (error) { 2091 goto error1; 2092 } 2093 2094 if (resblks) 2095 resblks -= fs_blocks; 2096 ip->i_d.di_size = pathlen; 2097 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2098 2099 cur_chunk = target_path; 2100 for (n = 0; n < nmaps; n++) { 2101 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 2102 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 2103 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 2104 BTOBB(byte_cnt), 0); 2105 ASSERT(bp && !XFS_BUF_GETERROR(bp)); 2106 if (pathlen < byte_cnt) { 2107 byte_cnt = pathlen; 2108 } 2109 pathlen -= byte_cnt; 2110 2111 memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt); 2112 cur_chunk += byte_cnt; 2113 2114 xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1); 2115 } 2116 } 2117 2118 /* 2119 * Create the directory entry for the symlink. 2120 */ 2121 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, 2122 &first_block, &free_list, resblks); 2123 if (error) 2124 goto error1; 2125 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2126 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2127 2128 /* 2129 * If this is a synchronous mount, make sure that the 2130 * symlink transaction goes to disk before returning to 2131 * the user. 2132 */ 2133 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2134 xfs_trans_set_sync(tp); 2135 } 2136 2137 /* 2138 * xfs_trans_commit normally decrements the vnode ref count 2139 * when it unlocks the inode. Since we want to return the 2140 * vnode to the caller, we bump the vnode ref count now. 2141 */ 2142 IHOLD(ip); 2143 2144 error = xfs_bmap_finish(&tp, &free_list, &committed); 2145 if (error) { 2146 goto error2; 2147 } 2148 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2149 xfs_qm_dqrele(udqp); 2150 xfs_qm_dqrele(gdqp); 2151 2152 *ipp = ip; 2153 return 0; 2154 2155 error2: 2156 IRELE(ip); 2157 error1: 2158 xfs_bmap_cancel(&free_list); 2159 cancel_flags |= XFS_TRANS_ABORT; 2160 error_return: 2161 xfs_trans_cancel(tp, cancel_flags); 2162 xfs_qm_dqrele(udqp); 2163 xfs_qm_dqrele(gdqp); 2164 2165 if (unlock_dp_on_error) 2166 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2167 std_return: 2168 return error; 2169} 2170 2171int 2172xfs_set_dmattrs( 2173 xfs_inode_t *ip, 2174 u_int evmask, 2175 u_int16_t state) 2176{ 2177 xfs_mount_t *mp = ip->i_mount; 2178 xfs_trans_t *tp; 2179 int error; 2180 2181 if (!capable(CAP_SYS_ADMIN)) 2182 return XFS_ERROR(EPERM); 2183 2184 if (XFS_FORCED_SHUTDOWN(mp)) 2185 return XFS_ERROR(EIO); 2186 2187 tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); 2188 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0); 2189 if (error) { 2190 xfs_trans_cancel(tp, 0); 2191 return error; 2192 } 2193 xfs_ilock(ip, XFS_ILOCK_EXCL); 2194 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); 2195 2196 ip->i_d.di_dmevmask = evmask; 2197 ip->i_d.di_dmstate = state; 2198 2199 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2200 error = xfs_trans_commit(tp, 0); 2201 2202 return error; 2203} 2204 2205/* 2206 * xfs_alloc_file_space() 2207 * This routine allocates disk space for the given file. 2208 * 2209 * If alloc_type == 0, this request is for an ALLOCSP type 2210 * request which will change the file size. In this case, no 2211 * DMAPI event will be generated by the call. A TRUNCATE event 2212 * will be generated later by xfs_setattr. 2213 * 2214 * If alloc_type != 0, this request is for a RESVSP type 2215 * request, and a DMAPI DM_EVENT_WRITE will be generated if the 2216 * lower block boundary byte address is less than the file's 2217 * length. 2218 * 2219 * RETURNS: 2220 * 0 on success 2221 * errno on error 2222 * 2223 */ 2224STATIC int 2225xfs_alloc_file_space( 2226 xfs_inode_t *ip, 2227 xfs_off_t offset, 2228 xfs_off_t len, 2229 int alloc_type, 2230 int attr_flags) 2231{ 2232 xfs_mount_t *mp = ip->i_mount; 2233 xfs_off_t count; 2234 xfs_filblks_t allocated_fsb; 2235 xfs_filblks_t allocatesize_fsb; 2236 xfs_extlen_t extsz, temp; 2237 xfs_fileoff_t startoffset_fsb; 2238 xfs_fsblock_t firstfsb; 2239 int nimaps; 2240 int bmapi_flag; 2241 int quota_flag; 2242 int rt; 2243 xfs_trans_t *tp; 2244 xfs_bmbt_irec_t imaps[1], *imapp; 2245 xfs_bmap_free_t free_list; 2246 uint qblocks, resblks, resrtextents; 2247 int committed; 2248 int error; 2249 2250 trace_xfs_alloc_file_space(ip); 2251 2252 if (XFS_FORCED_SHUTDOWN(mp)) 2253 return XFS_ERROR(EIO); 2254 2255 error = xfs_qm_dqattach(ip, 0); 2256 if (error) 2257 return error; 2258 2259 if (len <= 0) 2260 return XFS_ERROR(EINVAL); 2261 2262 rt = XFS_IS_REALTIME_INODE(ip); 2263 extsz = xfs_get_extsz_hint(ip); 2264 2265 count = len; 2266 imapp = &imaps[0]; 2267 nimaps = 1; 2268 bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); 2269 startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 2270 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 2271 2272 /* 2273 * Allocate file space until done or until there is an error 2274 */ 2275 while (allocatesize_fsb && !error) { 2276 xfs_fileoff_t s, e; 2277 2278 /* 2279 * Determine space reservations for data/realtime. 2280 */ 2281 if (unlikely(extsz)) { 2282 s = startoffset_fsb; 2283 do_div(s, extsz); 2284 s *= extsz; 2285 e = startoffset_fsb + allocatesize_fsb; 2286 if ((temp = do_mod(startoffset_fsb, extsz))) 2287 e += temp; 2288 if ((temp = do_mod(e, extsz))) 2289 e += extsz - temp; 2290 } else { 2291 s = 0; 2292 e = allocatesize_fsb; 2293 } 2294 2295 /* 2296 * The transaction reservation is limited to a 32-bit block 2297 * count, hence we need to limit the number of blocks we are 2298 * trying to reserve to avoid an overflow. We can't allocate 2299 * more than @nimaps extents, and an extent is limited on disk 2300 * to MAXEXTLEN (21 bits), so use that to enforce the limit. 2301 */ 2302 resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); 2303 if (unlikely(rt)) { 2304 resrtextents = qblocks = resblks; 2305 resrtextents /= mp->m_sb.sb_rextsize; 2306 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 2307 quota_flag = XFS_QMOPT_RES_RTBLKS; 2308 } else { 2309 resrtextents = 0; 2310 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); 2311 quota_flag = XFS_QMOPT_RES_REGBLKS; 2312 } 2313 2314 /* 2315 * Allocate and setup the transaction. 2316 */ 2317 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 2318 error = xfs_trans_reserve(tp, resblks, 2319 XFS_WRITE_LOG_RES(mp), resrtextents, 2320 XFS_TRANS_PERM_LOG_RES, 2321 XFS_WRITE_LOG_COUNT); 2322 /* 2323 * Check for running out of space 2324 */ 2325 if (error) { 2326 /* 2327 * Free the transaction structure. 2328 */ 2329 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 2330 xfs_trans_cancel(tp, 0); 2331 break; 2332 } 2333 xfs_ilock(ip, XFS_ILOCK_EXCL); 2334 error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 2335 0, quota_flag); 2336 if (error) 2337 goto error1; 2338 2339 xfs_trans_ijoin(tp, ip); 2340 2341 /* 2342 * Issue the xfs_bmapi() call to allocate the blocks 2343 */ 2344 xfs_bmap_init(&free_list, &firstfsb); 2345 error = xfs_bmapi(tp, ip, startoffset_fsb, 2346 allocatesize_fsb, bmapi_flag, 2347 &firstfsb, 0, imapp, &nimaps, 2348 &free_list); 2349 if (error) { 2350 goto error0; 2351 } 2352 2353 /* 2354 * Complete the transaction 2355 */ 2356 error = xfs_bmap_finish(&tp, &free_list, &committed); 2357 if (error) { 2358 goto error0; 2359 } 2360 2361 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2362 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2363 if (error) { 2364 break; 2365 } 2366 2367 allocated_fsb = imapp->br_blockcount; 2368 2369 if (nimaps == 0) { 2370 error = XFS_ERROR(ENOSPC); 2371 break; 2372 } 2373 2374 startoffset_fsb += allocated_fsb; 2375 allocatesize_fsb -= allocated_fsb; 2376 } 2377 2378 return error; 2379 2380error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 2381 xfs_bmap_cancel(&free_list); 2382 xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); 2383 2384error1: /* Just cancel transaction */ 2385 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 2386 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2387 return error; 2388} 2389 2390/* 2391 * Zero file bytes between startoff and endoff inclusive. 2392 * The iolock is held exclusive and no blocks are buffered. 2393 * 2394 * This function is used by xfs_free_file_space() to zero 2395 * partial blocks when the range to free is not block aligned. 2396 * When unreserving space with boundaries that are not block 2397 * aligned we round up the start and round down the end 2398 * boundaries and then use this function to zero the parts of 2399 * the blocks that got dropped during the rounding. 2400 */ 2401STATIC int 2402xfs_zero_remaining_bytes( 2403 xfs_inode_t *ip, 2404 xfs_off_t startoff, 2405 xfs_off_t endoff) 2406{ 2407 xfs_bmbt_irec_t imap; 2408 xfs_fileoff_t offset_fsb; 2409 xfs_off_t lastoffset; 2410 xfs_off_t offset; 2411 xfs_buf_t *bp; 2412 xfs_mount_t *mp = ip->i_mount; 2413 int nimap; 2414 int error = 0; 2415 2416 /* 2417 * Avoid doing I/O beyond eof - it's not necessary 2418 * since nothing can read beyond eof. The space will 2419 * be zeroed when the file is extended anyway. 2420 */ 2421 if (startoff >= ip->i_size) 2422 return 0; 2423 2424 if (endoff > ip->i_size) 2425 endoff = ip->i_size; 2426 2427 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, 2428 XFS_IS_REALTIME_INODE(ip) ? 2429 mp->m_rtdev_targp : mp->m_ddev_targp); 2430 if (!bp) 2431 return XFS_ERROR(ENOMEM); 2432 2433 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 2434 offset_fsb = XFS_B_TO_FSBT(mp, offset); 2435 nimap = 1; 2436 error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, 2437 NULL, 0, &imap, &nimap, NULL); 2438 if (error || nimap < 1) 2439 break; 2440 ASSERT(imap.br_blockcount >= 1); 2441 ASSERT(imap.br_startoff == offset_fsb); 2442 lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; 2443 if (lastoffset > endoff) 2444 lastoffset = endoff; 2445 if (imap.br_startblock == HOLESTARTBLOCK) 2446 continue; 2447 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 2448 if (imap.br_state == XFS_EXT_UNWRITTEN) 2449 continue; 2450 XFS_BUF_UNDONE(bp); 2451 XFS_BUF_UNWRITE(bp); 2452 XFS_BUF_READ(bp); 2453 XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); 2454 xfsbdstrat(mp, bp); 2455 error = xfs_iowait(bp); 2456 if (error) { 2457 xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", 2458 mp, bp, XFS_BUF_ADDR(bp)); 2459 break; 2460 } 2461 memset(XFS_BUF_PTR(bp) + 2462 (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 2463 0, lastoffset - offset + 1); 2464 XFS_BUF_UNDONE(bp); 2465 XFS_BUF_UNREAD(bp); 2466 XFS_BUF_WRITE(bp); 2467 xfsbdstrat(mp, bp); 2468 error = xfs_iowait(bp); 2469 if (error) { 2470 xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", 2471 mp, bp, XFS_BUF_ADDR(bp)); 2472 break; 2473 } 2474 } 2475 xfs_buf_free(bp); 2476 return error; 2477} 2478 2479/* 2480 * xfs_free_file_space() 2481 * This routine frees disk space for the given file. 2482 * 2483 * This routine is only called by xfs_change_file_space 2484 * for an UNRESVSP type call. 2485 * 2486 * RETURNS: 2487 * 0 on success 2488 * errno on error 2489 * 2490 */ 2491STATIC int 2492xfs_free_file_space( 2493 xfs_inode_t *ip, 2494 xfs_off_t offset, 2495 xfs_off_t len, 2496 int attr_flags) 2497{ 2498 int committed; 2499 int done; 2500 xfs_fileoff_t endoffset_fsb; 2501 int error; 2502 xfs_fsblock_t firstfsb; 2503 xfs_bmap_free_t free_list; 2504 xfs_bmbt_irec_t imap; 2505 xfs_off_t ioffset; 2506 xfs_extlen_t mod=0; 2507 xfs_mount_t *mp; 2508 int nimap; 2509 uint resblks; 2510 uint rounding; 2511 int rt; 2512 xfs_fileoff_t startoffset_fsb; 2513 xfs_trans_t *tp; 2514 int need_iolock = 1; 2515 2516 mp = ip->i_mount; 2517 2518 trace_xfs_free_file_space(ip); 2519 2520 error = xfs_qm_dqattach(ip, 0); 2521 if (error) 2522 return error; 2523 2524 error = 0; 2525 if (len <= 0) /* if nothing being freed */ 2526 return error; 2527 rt = XFS_IS_REALTIME_INODE(ip); 2528 startoffset_fsb = XFS_B_TO_FSB(mp, offset); 2529 endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); 2530 2531 if (attr_flags & XFS_ATTR_NOLOCK) 2532 need_iolock = 0; 2533 if (need_iolock) { 2534 xfs_ilock(ip, XFS_IOLOCK_EXCL); 2535 /* wait for the completion of any pending DIOs */ 2536 xfs_ioend_wait(ip); 2537 } 2538 2539 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 2540 ioffset = offset & ~(rounding - 1); 2541 2542 if (VN_CACHED(VFS_I(ip)) != 0) { 2543 error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); 2544 if (error) 2545 goto out_unlock_iolock; 2546 } 2547 2548 /* 2549 * Need to zero the stuff we're not freeing, on disk. 2550 * If it's a realtime file & can't use unwritten extents then we 2551 * actually need to zero the extent edges. Otherwise xfs_bunmapi 2552 * will take care of it for us. 2553 */ 2554 if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 2555 nimap = 1; 2556 error = xfs_bmapi(NULL, ip, startoffset_fsb, 2557 1, 0, NULL, 0, &imap, &nimap, NULL); 2558 if (error) 2559 goto out_unlock_iolock; 2560 ASSERT(nimap == 0 || nimap == 1); 2561 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 2562 xfs_daddr_t block; 2563 2564 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 2565 block = imap.br_startblock; 2566 mod = do_div(block, mp->m_sb.sb_rextsize); 2567 if (mod) 2568 startoffset_fsb += mp->m_sb.sb_rextsize - mod; 2569 } 2570 nimap = 1; 2571 error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, 2572 1, 0, NULL, 0, &imap, &nimap, NULL); 2573 if (error) 2574 goto out_unlock_iolock; 2575 ASSERT(nimap == 0 || nimap == 1); 2576 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 2577 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 2578 mod++; 2579 if (mod && (mod != mp->m_sb.sb_rextsize)) 2580 endoffset_fsb -= mod; 2581 } 2582 } 2583 if ((done = (endoffset_fsb <= startoffset_fsb))) 2584 /* 2585 * One contiguous piece to clear 2586 */ 2587 error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1); 2588 else { 2589 /* 2590 * Some full blocks, possibly two pieces to clear 2591 */ 2592 if (offset < XFS_FSB_TO_B(mp, startoffset_fsb)) 2593 error = xfs_zero_remaining_bytes(ip, offset, 2594 XFS_FSB_TO_B(mp, startoffset_fsb) - 1); 2595 if (!error && 2596 XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len) 2597 error = xfs_zero_remaining_bytes(ip, 2598 XFS_FSB_TO_B(mp, endoffset_fsb), 2599 offset + len - 1); 2600 } 2601 2602 /* 2603 * free file space until done or until there is an error 2604 */ 2605 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 2606 while (!error && !done) { 2607 2608 /* 2609 * allocate and setup the transaction. Allow this 2610 * transaction to dip into the reserve blocks to ensure 2611 * the freeing of the space succeeds at ENOSPC. 2612 */ 2613 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 2614 tp->t_flags |= XFS_TRANS_RESERVE; 2615 error = xfs_trans_reserve(tp, 2616 resblks, 2617 XFS_WRITE_LOG_RES(mp), 2618 0, 2619 XFS_TRANS_PERM_LOG_RES, 2620 XFS_WRITE_LOG_COUNT); 2621 2622 /* 2623 * check for running out of space 2624 */ 2625 if (error) { 2626 /* 2627 * Free the transaction structure. 2628 */ 2629 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 2630 xfs_trans_cancel(tp, 0); 2631 break; 2632 } 2633 xfs_ilock(ip, XFS_ILOCK_EXCL); 2634 error = xfs_trans_reserve_quota(tp, mp, 2635 ip->i_udquot, ip->i_gdquot, 2636 resblks, 0, XFS_QMOPT_RES_REGBLKS); 2637 if (error) 2638 goto error1; 2639 2640 xfs_trans_ijoin(tp, ip); 2641 2642 /* 2643 * issue the bunmapi() call to free the blocks 2644 */ 2645 xfs_bmap_init(&free_list, &firstfsb); 2646 error = xfs_bunmapi(tp, ip, startoffset_fsb, 2647 endoffset_fsb - startoffset_fsb, 2648 0, 2, &firstfsb, &free_list, &done); 2649 if (error) { 2650 goto error0; 2651 } 2652 2653 /* 2654 * complete the transaction 2655 */ 2656 error = xfs_bmap_finish(&tp, &free_list, &committed); 2657 if (error) { 2658 goto error0; 2659 } 2660 2661 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2662 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2663 } 2664 2665 out_unlock_iolock: 2666 if (need_iolock) 2667 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 2668 return error; 2669 2670 error0: 2671 xfs_bmap_cancel(&free_list); 2672 error1: 2673 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 2674 xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) : 2675 XFS_ILOCK_EXCL); 2676 return error; 2677} 2678 2679/* 2680 * xfs_change_file_space() 2681 * This routine allocates or frees disk space for the given file. 2682 * The user specified parameters are checked for alignment and size 2683 * limitations. 2684 * 2685 * RETURNS: 2686 * 0 on success 2687 * errno on error 2688 * 2689 */ 2690int 2691xfs_change_file_space( 2692 xfs_inode_t *ip, 2693 int cmd, 2694 xfs_flock64_t *bf, 2695 xfs_off_t offset, 2696 int attr_flags) 2697{ 2698 xfs_mount_t *mp = ip->i_mount; 2699 int clrprealloc; 2700 int error; 2701 xfs_fsize_t fsize; 2702 int setprealloc; 2703 xfs_off_t startoffset; 2704 xfs_off_t llen; 2705 xfs_trans_t *tp; 2706 struct iattr iattr; 2707 2708 if (!S_ISREG(ip->i_d.di_mode)) 2709 return XFS_ERROR(EINVAL); 2710 2711 switch (bf->l_whence) { 2712 case 0: /*SEEK_SET*/ 2713 break; 2714 case 1: /*SEEK_CUR*/ 2715 bf->l_start += offset; 2716 break; 2717 case 2: /*SEEK_END*/ 2718 bf->l_start += ip->i_size; 2719 break; 2720 default: 2721 return XFS_ERROR(EINVAL); 2722 } 2723 2724 llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len; 2725 2726 if ( (bf->l_start < 0) 2727 || (bf->l_start > XFS_MAXIOFFSET(mp)) 2728 || (bf->l_start + llen < 0) 2729 || (bf->l_start + llen > XFS_MAXIOFFSET(mp))) 2730 return XFS_ERROR(EINVAL); 2731 2732 bf->l_whence = 0; 2733 2734 startoffset = bf->l_start; 2735 fsize = ip->i_size; 2736 2737 /* 2738 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve 2739 * file space. 2740 * These calls do NOT zero the data space allocated to the file, 2741 * nor do they change the file size. 2742 * 2743 * XFS_IOC_ALLOCSP and XFS_IOC_FREESP will allocate and free file 2744 * space. 2745 * These calls cause the new file data to be zeroed and the file 2746 * size to be changed. 2747 */ 2748 setprealloc = clrprealloc = 0; 2749 2750 switch (cmd) { 2751 case XFS_IOC_RESVSP: 2752 case XFS_IOC_RESVSP64: 2753 error = xfs_alloc_file_space(ip, startoffset, bf->l_len, 2754 1, attr_flags); 2755 if (error) 2756 return error; 2757 setprealloc = 1; 2758 break; 2759 2760 case XFS_IOC_UNRESVSP: 2761 case XFS_IOC_UNRESVSP64: 2762 if ((error = xfs_free_file_space(ip, startoffset, bf->l_len, 2763 attr_flags))) 2764 return error; 2765 break; 2766 2767 case XFS_IOC_ALLOCSP: 2768 case XFS_IOC_ALLOCSP64: 2769 case XFS_IOC_FREESP: 2770 case XFS_IOC_FREESP64: 2771 if (startoffset > fsize) { 2772 error = xfs_alloc_file_space(ip, fsize, 2773 startoffset - fsize, 0, attr_flags); 2774 if (error) 2775 break; 2776 } 2777 2778 iattr.ia_valid = ATTR_SIZE; 2779 iattr.ia_size = startoffset; 2780 2781 error = xfs_setattr(ip, &iattr, attr_flags); 2782 2783 if (error) 2784 return error; 2785 2786 clrprealloc = 1; 2787 break; 2788 2789 default: 2790 ASSERT(0); 2791 return XFS_ERROR(EINVAL); 2792 } 2793 2794 /* 2795 * update the inode timestamp, mode, and prealloc flag bits 2796 */ 2797 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); 2798 2799 if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp), 2800 0, 0, 0))) { 2801 /* ASSERT(0); */ 2802 xfs_trans_cancel(tp, 0); 2803 return error; 2804 } 2805 2806 xfs_ilock(ip, XFS_ILOCK_EXCL); 2807 2808 xfs_trans_ijoin(tp, ip); 2809 2810 if ((attr_flags & XFS_ATTR_DMI) == 0) { 2811 ip->i_d.di_mode &= ~S_ISUID; 2812 2813 /* 2814 * Note that we don't have to worry about mandatory 2815 * file locking being disabled here because we only 2816 * clear the S_ISGID bit if the Group execute bit is 2817 * on, but if it was on then mandatory locking wouldn't 2818 * have been enabled. 2819 */ 2820 if (ip->i_d.di_mode & S_IXGRP) 2821 ip->i_d.di_mode &= ~S_ISGID; 2822 2823 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2824 } 2825 if (setprealloc) 2826 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 2827 else if (clrprealloc) 2828 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 2829 2830 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2831 xfs_trans_set_sync(tp); 2832 2833 error = xfs_trans_commit(tp, 0); 2834 2835 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2836 2837 return error; 2838} 2839