1/* 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18#include "xfs.h" 19#include "xfs_fs.h" 20#include "xfs_types.h" 21#include "xfs_bit.h" 22#include "xfs_log.h" 23#include "xfs_inum.h" 24#include "xfs_trans.h" 25#include "xfs_sb.h" 26#include "xfs_ag.h" 27#include "xfs_dir.h" 28#include "xfs_dir2.h" 29#include "xfs_dmapi.h" 30#include "xfs_mount.h" 31#include "xfs_bmap_btree.h" 32#include "xfs_dir_sf.h" 33#include "xfs_dir2_sf.h" 34#include "xfs_attr_sf.h" 35#include "xfs_dinode.h" 36#include "xfs_inode.h" 37#include "xfs_inode_item.h" 38#include "xfs_bmap.h" 39#include "xfs_error.h" 40#include "xfs_quota.h" 41#include "xfs_rw.h" 42#include "xfs_itable.h" 43#include "xfs_utils.h" 44 45/* 46 * xfs_get_dir_entry is used to get a reference to an inode given 47 * its parent directory inode and the name of the file. It does 48 * not lock the child inode, and it unlocks the directory before 49 * returning. The directory's generation number is returned for 50 * use by a later call to xfs_lock_dir_and_entry. 51 */ 52int 53xfs_get_dir_entry( 54 vname_t *dentry, 55 xfs_inode_t **ipp) 56{ 57 xfs_vnode_t *vp; 58 59 vp = VNAME_TO_VNODE(dentry); 60 61 *ipp = xfs_vtoi(vp); 62 if (!*ipp) 63 return XFS_ERROR(ENOENT); 64 VN_HOLD(vp); 65 return 0; 66} 67 68int 69xfs_dir_lookup_int( 70 bhv_desc_t *dir_bdp, 71 uint lock_mode, 72 vname_t *dentry, 73 xfs_ino_t *inum, 74 xfs_inode_t **ipp) 75{ 76 xfs_vnode_t *dir_vp; 77 xfs_inode_t *dp; 78 int error; 79 80 dir_vp = BHV_TO_VNODE(dir_bdp); 81 vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); 82 83 dp = XFS_BHVTOI(dir_bdp); 84 85 error = XFS_DIR_LOOKUP(dp->i_mount, NULL, dp, 86 VNAME(dentry), VNAMELEN(dentry), inum); 87 if (!error) { 88 /* 89 * Unlock the directory. We do this because we can't 90 * hold the directory lock while doing the vn_get() 91 * in xfs_iget(). Doing so could cause us to hold 92 * a lock while waiting for the inode to finish 93 * being inactive while it's waiting for a log 94 * reservation in the inactive routine. 95 */ 96 xfs_iunlock(dp, lock_mode); 97 error = xfs_iget(dp->i_mount, NULL, *inum, 0, 0, ipp, 0); 98 xfs_ilock(dp, lock_mode); 99 100 if (error) { 101 *ipp = NULL; 102 } else if ((*ipp)->i_d.di_mode == 0) { 103 /* 104 * The inode has been freed. Something is 105 * wrong so just get out of here. 106 */ 107 xfs_iunlock(dp, lock_mode); 108 xfs_iput_new(*ipp, 0); 109 *ipp = NULL; 110 xfs_ilock(dp, lock_mode); 111 error = XFS_ERROR(ENOENT); 112 } 113 } 114 return error; 115} 116 117/* 118 * Allocates a new inode from disk and return a pointer to the 119 * incore copy. This routine will internally commit the current 120 * transaction and allocate a new one if the Space Manager needed 121 * to do an allocation to replenish the inode free-list. 122 * 123 * This routine is designed to be called from xfs_create and 124 * xfs_create_dir. 125 * 126 */ 127int 128xfs_dir_ialloc( 129 xfs_trans_t **tpp, /* input: current transaction; 130 output: may be a new transaction. */ 131 xfs_inode_t *dp, /* directory within whose allocate 132 the inode. */ 133 mode_t mode, 134 xfs_nlink_t nlink, 135 xfs_dev_t rdev, 136 cred_t *credp, 137 prid_t prid, /* project id */ 138 int okalloc, /* ok to allocate new space */ 139 xfs_inode_t **ipp, /* pointer to inode; it will be 140 locked. */ 141 int *committed) 142 143{ 144 xfs_trans_t *tp; 145 xfs_trans_t *ntp; 146 xfs_inode_t *ip; 147 xfs_buf_t *ialloc_context = NULL; 148 boolean_t call_again = B_FALSE; 149 int code; 150 uint log_res; 151 uint log_count; 152 void *dqinfo; 153 uint tflags; 154 155 tp = *tpp; 156 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 157 158 /* 159 * xfs_ialloc will return a pointer to an incore inode if 160 * the Space Manager has an available inode on the free 161 * list. Otherwise, it will do an allocation and replenish 162 * the freelist. Since we can only do one allocation per 163 * transaction without deadlocks, we will need to commit the 164 * current transaction and start a new one. We will then 165 * need to call xfs_ialloc again to get the inode. 166 * 167 * If xfs_ialloc did an allocation to replenish the freelist, 168 * it returns the bp containing the head of the freelist as 169 * ialloc_context. We will hold a lock on it across the 170 * transaction commit so that no other process can steal 171 * the inode(s) that we've just allocated. 172 */ 173 code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid, okalloc, 174 &ialloc_context, &call_again, &ip); 175 176 /* 177 * Return an error if we were unable to allocate a new inode. 178 * This should only happen if we run out of space on disk or 179 * encounter a disk error. 180 */ 181 if (code) { 182 *ipp = NULL; 183 return code; 184 } 185 if (!call_again && (ip == NULL)) { 186 *ipp = NULL; 187 return XFS_ERROR(ENOSPC); 188 } 189 190 /* 191 * If call_again is set, then we were unable to get an 192 * inode in one operation. We need to commit the current 193 * transaction and call xfs_ialloc() again. It is guaranteed 194 * to succeed the second time. 195 */ 196 if (call_again) { 197 198 /* 199 * Normally, xfs_trans_commit releases all the locks. 200 * We call bhold to hang on to the ialloc_context across 201 * the commit. Holding this buffer prevents any other 202 * processes from doing any allocations in this 203 * allocation group. 204 */ 205 xfs_trans_bhold(tp, ialloc_context); 206 /* 207 * Save the log reservation so we can use 208 * them in the next transaction. 209 */ 210 log_res = xfs_trans_get_log_res(tp); 211 log_count = xfs_trans_get_log_count(tp); 212 213 /* 214 * We want the quota changes to be associated with the next 215 * transaction, NOT this one. So, detach the dqinfo from this 216 * and attach it to the next transaction. 217 */ 218 dqinfo = NULL; 219 tflags = 0; 220 if (tp->t_dqinfo) { 221 dqinfo = (void *)tp->t_dqinfo; 222 tp->t_dqinfo = NULL; 223 tflags = tp->t_flags & XFS_TRANS_DQ_DIRTY; 224 tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY); 225 } 226 227 ntp = xfs_trans_dup(tp); 228 code = xfs_trans_commit(tp, 0, NULL); 229 tp = ntp; 230 if (committed != NULL) { 231 *committed = 1; 232 } 233 /* 234 * If we get an error during the commit processing, 235 * release the buffer that is still held and return 236 * to the caller. 237 */ 238 if (code) { 239 xfs_buf_relse(ialloc_context); 240 if (dqinfo) { 241 tp->t_dqinfo = dqinfo; 242 XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp); 243 } 244 *tpp = ntp; 245 *ipp = NULL; 246 return code; 247 } 248 code = xfs_trans_reserve(tp, 0, log_res, 0, 249 XFS_TRANS_PERM_LOG_RES, log_count); 250 /* 251 * Re-attach the quota info that we detached from prev trx. 252 */ 253 if (dqinfo) { 254 tp->t_dqinfo = dqinfo; 255 tp->t_flags |= tflags; 256 } 257 258 if (code) { 259 xfs_buf_relse(ialloc_context); 260 *tpp = ntp; 261 *ipp = NULL; 262 return code; 263 } 264 xfs_trans_bjoin(tp, ialloc_context); 265 266 /* 267 * Call ialloc again. Since we've locked out all 268 * other allocations in this allocation group, 269 * this call should always succeed. 270 */ 271 code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid, 272 okalloc, &ialloc_context, &call_again, &ip); 273 274 /* 275 * If we get an error at this point, return to the caller 276 * so that the current transaction can be aborted. 277 */ 278 if (code) { 279 *tpp = tp; 280 *ipp = NULL; 281 return code; 282 } 283 ASSERT ((!call_again) && (ip != NULL)); 284 285 } else { 286 if (committed != NULL) { 287 *committed = 0; 288 } 289 } 290 291 *ipp = ip; 292 *tpp = tp; 293 294 return 0; 295} 296 297/* 298 * Decrement the link count on an inode & log the change. 299 * If this causes the link count to go to zero, initiate the 300 * logging activity required to truncate a file. 301 */ 302int /* error */ 303xfs_droplink( 304 xfs_trans_t *tp, 305 xfs_inode_t *ip) 306{ 307 int error; 308 309 xfs_ichgtime(ip, XFS_ICHGTIME_CHG); 310 311 ASSERT (ip->i_d.di_nlink > 0); 312 ip->i_d.di_nlink--; 313 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 314 315 error = 0; 316 if (ip->i_d.di_nlink == 0) { 317 /* 318 * We're dropping the last link to this file. 319 * Move the on-disk inode to the AGI unlinked list. 320 * From xfs_inactive() we will pull the inode from 321 * the list and free it. 322 */ 323 error = xfs_iunlink(tp, ip); 324 } 325 return error; 326} 327 328/* 329 * This gets called when the inode's version needs to be changed from 1 to 2. 330 * Currently this happens when the nlink field overflows the old 16-bit value 331 * or when chproj is called to change the project for the first time. 332 * As a side effect the superblock version will also get rev'd 333 * to contain the NLINK bit. 334 */ 335void 336xfs_bump_ino_vers2( 337 xfs_trans_t *tp, 338 xfs_inode_t *ip) 339{ 340 xfs_mount_t *mp; 341 unsigned long s; 342 343 ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); 344 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1); 345 346 ip->i_d.di_version = XFS_DINODE_VERSION_2; 347 ip->i_d.di_onlink = 0; 348 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 349 mp = tp->t_mountp; 350 if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { 351 s = XFS_SB_LOCK(mp); 352 if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { 353 XFS_SB_VERSION_ADDNLINK(&mp->m_sb); 354 XFS_SB_UNLOCK(mp, s); 355 xfs_mod_sb(tp, XFS_SB_VERSIONNUM); 356 } else { 357 XFS_SB_UNLOCK(mp, s); 358 } 359 } 360 /* Caller must log the inode */ 361} 362 363/* 364 * Increment the link count on an inode & log the change. 365 */ 366int 367xfs_bumplink( 368 xfs_trans_t *tp, 369 xfs_inode_t *ip) 370{ 371 if (ip->i_d.di_nlink >= XFS_MAXLINK) 372 return XFS_ERROR(EMLINK); 373 xfs_ichgtime(ip, XFS_ICHGTIME_CHG); 374 375 ASSERT(ip->i_d.di_nlink > 0); 376 ip->i_d.di_nlink++; 377 if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) && 378 (ip->i_d.di_nlink > XFS_MAXLINK_1)) { 379 /* 380 * The inode has increased its number of links beyond 381 * what can fit in an old format inode. It now needs 382 * to be converted to a version 2 inode with a 32 bit 383 * link count. If this is the first inode in the file 384 * system to do this, then we need to bump the superblock 385 * version number as well. 386 */ 387 xfs_bump_ino_vers2(tp, ip); 388 } 389 390 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 391 return 0; 392} 393 394/* 395 * Try to truncate the given file to 0 length. Currently called 396 * only out of xfs_remove when it has to truncate a file to free 397 * up space for the remove to proceed. 398 */ 399int 400xfs_truncate_file( 401 xfs_mount_t *mp, 402 xfs_inode_t *ip) 403{ 404 xfs_trans_t *tp; 405 int error; 406 407#ifdef QUOTADEBUG 408 /* 409 * This is called to truncate the quotainodes too. 410 */ 411 if (XFS_IS_UQUOTA_ON(mp)) { 412 if (ip->i_ino != mp->m_sb.sb_uquotino) 413 ASSERT(ip->i_udquot); 414 } 415 if (XFS_IS_OQUOTA_ON(mp)) { 416 if (ip->i_ino != mp->m_sb.sb_gquotino) 417 ASSERT(ip->i_gdquot); 418 } 419#endif 420 /* 421 * Make the call to xfs_itruncate_start before starting the 422 * transaction, because we cannot make the call while we're 423 * in a transaction. 424 */ 425 xfs_ilock(ip, XFS_IOLOCK_EXCL); 426 xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, (xfs_fsize_t)0); 427 428 tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); 429 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 430 XFS_TRANS_PERM_LOG_RES, 431 XFS_ITRUNCATE_LOG_COUNT))) { 432 xfs_trans_cancel(tp, 0); 433 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 434 return error; 435 } 436 437 /* 438 * Follow the normal truncate locking protocol. Since we 439 * hold the inode in the transaction, we know that it's number 440 * of references will stay constant. 441 */ 442 xfs_ilock(ip, XFS_ILOCK_EXCL); 443 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 444 xfs_trans_ihold(tp, ip); 445 /* 446 * Signal a sync xaction. The only case where that isn't 447 * the case is if we're truncating an already unlinked file 448 * on a wsync fs. In that case, we know the blocks can't 449 * reappear in the file because the links to file are 450 * permanently toast. Currently, we're always going to 451 * want a sync transaction because this code is being 452 * called from places where nlink is guaranteed to be 1 453 * but I'm leaving the tests in to protect against future 454 * changes -- rcc. 455 */ 456 error = xfs_itruncate_finish(&tp, ip, (xfs_fsize_t)0, 457 XFS_DATA_FORK, 458 ((ip->i_d.di_nlink != 0 || 459 !(mp->m_flags & XFS_MOUNT_WSYNC)) 460 ? 1 : 0)); 461 if (error) { 462 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | 463 XFS_TRANS_ABORT); 464 } else { 465 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 466 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, 467 NULL); 468 } 469 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 470 471 return error; 472} 473