1153323Srodrigc/* 2159451Srodrigc * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3159451Srodrigc * All Rights Reserved. 4153323Srodrigc * 5159451Srodrigc * This program is free software; you can redistribute it and/or 6159451Srodrigc * modify it under the terms of the GNU General Public License as 7153323Srodrigc * published by the Free Software Foundation. 8153323Srodrigc * 9159451Srodrigc * This program is distributed in the hope that it would be useful, 10159451Srodrigc * but WITHOUT ANY WARRANTY; without even the implied warranty of 11159451Srodrigc * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12159451Srodrigc * GNU General Public License for more details. 13153323Srodrigc * 14159451Srodrigc * You should have received a copy of the GNU General Public License 15159451Srodrigc * along with this program; if not, write the Free Software Foundation, 16159451Srodrigc * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17153323Srodrigc */ 18153323Srodrigc#include "xfs.h" 19159451Srodrigc#include "xfs_fs.h" 20153323Srodrigc#include "xfs_types.h" 21159451Srodrigc#include "xfs_bit.h" 22159451Srodrigc#include "xfs_log.h" 23153323Srodrigc#include "xfs_inum.h" 24159451Srodrigc#include "xfs_imap.h" 25153323Srodrigc#include "xfs_trans.h" 26153323Srodrigc#include "xfs_trans_priv.h" 27153323Srodrigc#include "xfs_sb.h" 28153323Srodrigc#include "xfs_ag.h" 29153323Srodrigc#include "xfs_dir.h" 30153323Srodrigc#include "xfs_dir2.h" 31153323Srodrigc#include "xfs_dmapi.h" 32153323Srodrigc#include "xfs_mount.h" 33159451Srodrigc#include "xfs_bmap_btree.h" 34153323Srodrigc#include "xfs_alloc_btree.h" 35153323Srodrigc#include "xfs_ialloc_btree.h" 36153323Srodrigc#include "xfs_dir_sf.h" 37153323Srodrigc#include "xfs_dir2_sf.h" 38159451Srodrigc#include "xfs_attr_sf.h" 39153323Srodrigc#include "xfs_dinode.h" 40159451Srodrigc#include "xfs_inode.h" 41159451Srodrigc#include "xfs_buf_item.h" 42153323Srodrigc#include "xfs_inode_item.h" 43159451Srodrigc#include "xfs_btree.h" 44159451Srodrigc#include "xfs_alloc.h" 45159451Srodrigc#include "xfs_ialloc.h" 46153323Srodrigc#include "xfs_bmap.h" 47153323Srodrigc#include "xfs_rw.h" 48153323Srodrigc#include "xfs_error.h" 49153323Srodrigc#include "xfs_utils.h" 50153323Srodrigc#include "xfs_dir2_trace.h" 51153323Srodrigc#include "xfs_quota.h" 52153323Srodrigc#include "xfs_mac.h" 53153323Srodrigc#include "xfs_acl.h" 54153323Srodrigc 55153323Srodrigc 56153323Srodrigckmem_zone_t *xfs_ifork_zone; 57153323Srodrigckmem_zone_t *xfs_inode_zone; 58153323Srodrigckmem_zone_t *xfs_chashlist_zone; 59153323Srodrigc 60153323Srodrigc/* 61153323Srodrigc * Used in xfs_itruncate(). This is the maximum number of extents 62153323Srodrigc * freed from a file in a single transaction. 63153323Srodrigc */ 64153323Srodrigc#define XFS_ITRUNC_MAX_EXTENTS 2 65153323Srodrigc 66153323SrodrigcSTATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); 67153323SrodrigcSTATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); 68153323SrodrigcSTATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); 69153323SrodrigcSTATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); 70153323Srodrigc 71153323Srodrigc#ifdef DEBUG 72153323Srodrigc/* 73153323Srodrigc * Make sure that the extents in the given memory buffer 74153323Srodrigc * are valid. 75153323Srodrigc */ 76153323SrodrigcSTATIC void 77153323Srodrigcxfs_validate_extents( 78159451Srodrigc xfs_ifork_t *ifp, 79153323Srodrigc int nrecs, 80153323Srodrigc int disk, 81153323Srodrigc xfs_exntfmt_t fmt) 82153323Srodrigc{ 83159451Srodrigc xfs_bmbt_rec_t *ep; 84153323Srodrigc xfs_bmbt_irec_t irec; 85153323Srodrigc xfs_bmbt_rec_t rec; 86153323Srodrigc int i; 87153323Srodrigc 88153323Srodrigc for (i = 0; i < nrecs; i++) { 89159451Srodrigc ep = xfs_iext_get_ext(ifp, i); 90153323Srodrigc rec.l0 = get_unaligned((__uint64_t*)&ep->l0); 91153323Srodrigc rec.l1 = get_unaligned((__uint64_t*)&ep->l1); 92153323Srodrigc if (disk) 93153323Srodrigc xfs_bmbt_disk_get_all(&rec, &irec); 94153323Srodrigc else 95153323Srodrigc xfs_bmbt_get_all(&rec, &irec); 96153323Srodrigc if (fmt == XFS_EXTFMT_NOSTATE) 97153323Srodrigc ASSERT(irec.br_state == XFS_EXT_NORM); 98153323Srodrigc } 99153323Srodrigc} 100153323Srodrigc#else /* DEBUG */ 101159451Srodrigc#define xfs_validate_extents(ifp, nrecs, disk, fmt) 102153323Srodrigc#endif /* DEBUG */ 103153323Srodrigc 104153323Srodrigc/* 105153323Srodrigc * Check that none of the inode's in the buffer have a next 106153323Srodrigc * unlinked field of 0. 107153323Srodrigc */ 108153323Srodrigc#if defined(DEBUG) 109153323Srodrigcvoid 110153323Srodrigcxfs_inobp_check( 111153323Srodrigc xfs_mount_t *mp, 112153323Srodrigc xfs_buf_t *bp) 113153323Srodrigc{ 114153323Srodrigc int i; 115153323Srodrigc int j; 116153323Srodrigc xfs_dinode_t *dip; 117153323Srodrigc 118153323Srodrigc j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 119153323Srodrigc 120153323Srodrigc for (i = 0; i < j; i++) { 121153323Srodrigc dip = (xfs_dinode_t *)xfs_buf_offset(bp, 122153323Srodrigc i * mp->m_sb.sb_inodesize); 123159451Srodrigc if (!dip->di_next_unlinked) { 124153323Srodrigc xfs_fs_cmn_err(CE_ALERT, mp, 125153323Srodrigc "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.", 126153323Srodrigc bp); 127159451Srodrigc ASSERT(dip->di_next_unlinked); 128153323Srodrigc } 129153323Srodrigc } 130153323Srodrigc} 131153323Srodrigc#endif 132153323Srodrigc 133153323Srodrigc/* 134153323Srodrigc * This routine is called to map an inode number within a file 135153323Srodrigc * system to the buffer containing the on-disk version of the 136153323Srodrigc * inode. It returns a pointer to the buffer containing the 137153323Srodrigc * on-disk inode in the bpp parameter, and in the dip parameter 138153323Srodrigc * it returns a pointer to the on-disk inode within that buffer. 139153323Srodrigc * 140153323Srodrigc * If a non-zero error is returned, then the contents of bpp and 141153323Srodrigc * dipp are undefined. 142153323Srodrigc * 143153323Srodrigc * Use xfs_imap() to determine the size and location of the 144153323Srodrigc * buffer to read from disk. 145153323Srodrigc */ 146159451SrodrigcSTATIC int 147153323Srodrigcxfs_inotobp( 148153323Srodrigc xfs_mount_t *mp, 149153323Srodrigc xfs_trans_t *tp, 150153323Srodrigc xfs_ino_t ino, 151153323Srodrigc xfs_dinode_t **dipp, 152153323Srodrigc xfs_buf_t **bpp, 153153323Srodrigc int *offset) 154153323Srodrigc{ 155153323Srodrigc int di_ok; 156153323Srodrigc xfs_imap_t imap; 157153323Srodrigc xfs_buf_t *bp; 158153323Srodrigc int error; 159153323Srodrigc xfs_dinode_t *dip; 160153323Srodrigc 161153323Srodrigc /* 162159451Srodrigc * Call the space management code to find the location of the 163153323Srodrigc * inode on disk. 164153323Srodrigc */ 165153323Srodrigc imap.im_blkno = 0; 166153323Srodrigc error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); 167153323Srodrigc if (error != 0) { 168153323Srodrigc cmn_err(CE_WARN, 169153323Srodrigc "xfs_inotobp: xfs_imap() returned an " 170153323Srodrigc "error %d on %s. Returning error.", error, mp->m_fsname); 171153323Srodrigc return error; 172153323Srodrigc } 173153323Srodrigc 174153323Srodrigc /* 175153323Srodrigc * If the inode number maps to a block outside the bounds of the 176153323Srodrigc * file system then return NULL rather than calling read_buf 177153323Srodrigc * and panicing when we get an error from the driver. 178153323Srodrigc */ 179153323Srodrigc if ((imap.im_blkno + imap.im_len) > 180153323Srodrigc XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 181153323Srodrigc cmn_err(CE_WARN, 182159451Srodrigc "xfs_inotobp: inode number (%llu + %d) maps to a block outside the bounds " 183153323Srodrigc "of the file system %s. Returning EINVAL.", 184159451Srodrigc (unsigned long long)imap.im_blkno, 185159451Srodrigc imap.im_len, mp->m_fsname); 186153323Srodrigc return XFS_ERROR(EINVAL); 187153323Srodrigc } 188153323Srodrigc 189153323Srodrigc /* 190153323Srodrigc * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will 191153323Srodrigc * default to just a read_buf() call. 192153323Srodrigc */ 193153323Srodrigc error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, 194153323Srodrigc (int)imap.im_len, XFS_BUF_LOCK, &bp); 195153323Srodrigc 196153323Srodrigc if (error) { 197153323Srodrigc cmn_err(CE_WARN, 198153323Srodrigc "xfs_inotobp: xfs_trans_read_buf() returned an " 199153323Srodrigc "error %d on %s. Returning error.", error, mp->m_fsname); 200153323Srodrigc return error; 201153323Srodrigc } 202153323Srodrigc dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0); 203153323Srodrigc di_ok = 204153323Srodrigc INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC && 205153323Srodrigc XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT)); 206153323Srodrigc if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, 207153323Srodrigc XFS_RANDOM_ITOBP_INOTOBP))) { 208153323Srodrigc XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip); 209153323Srodrigc xfs_trans_brelse(tp, bp); 210153323Srodrigc cmn_err(CE_WARN, 211153323Srodrigc "xfs_inotobp: XFS_TEST_ERROR() returned an " 212153323Srodrigc "error on %s. Returning EFSCORRUPTED.", mp->m_fsname); 213153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 214153323Srodrigc } 215153323Srodrigc 216153323Srodrigc xfs_inobp_check(mp, bp); 217153323Srodrigc 218153323Srodrigc /* 219153323Srodrigc * Set *dipp to point to the on-disk inode in the buffer. 220153323Srodrigc */ 221153323Srodrigc *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 222153323Srodrigc *bpp = bp; 223153323Srodrigc *offset = imap.im_boffset; 224153323Srodrigc return 0; 225153323Srodrigc} 226153323Srodrigc 227153323Srodrigc 228153323Srodrigc/* 229153323Srodrigc * This routine is called to map an inode to the buffer containing 230153323Srodrigc * the on-disk version of the inode. It returns a pointer to the 231153323Srodrigc * buffer containing the on-disk inode in the bpp parameter, and in 232153323Srodrigc * the dip parameter it returns a pointer to the on-disk inode within 233153323Srodrigc * that buffer. 234153323Srodrigc * 235153323Srodrigc * If a non-zero error is returned, then the contents of bpp and 236153323Srodrigc * dipp are undefined. 237153323Srodrigc * 238153323Srodrigc * If the inode is new and has not yet been initialized, use xfs_imap() 239153323Srodrigc * to determine the size and location of the buffer to read from disk. 240153323Srodrigc * If the inode has already been mapped to its buffer and read in once, 241153323Srodrigc * then use the mapping information stored in the inode rather than 242153323Srodrigc * calling xfs_imap(). This allows us to avoid the overhead of looking 243153323Srodrigc * at the inode btree for small block file systems (see xfs_dilocate()). 244153323Srodrigc * We can tell whether the inode has been mapped in before by comparing 245153323Srodrigc * its disk block address to 0. Only uninitialized inodes will have 246153323Srodrigc * 0 for the disk block address. 247153323Srodrigc */ 248153323Srodrigcint 249153323Srodrigcxfs_itobp( 250153323Srodrigc xfs_mount_t *mp, 251153323Srodrigc xfs_trans_t *tp, 252153323Srodrigc xfs_inode_t *ip, 253153323Srodrigc xfs_dinode_t **dipp, 254153323Srodrigc xfs_buf_t **bpp, 255159451Srodrigc xfs_daddr_t bno, 256159451Srodrigc uint imap_flags) 257153323Srodrigc{ 258153323Srodrigc xfs_buf_t *bp; 259153323Srodrigc int error; 260153323Srodrigc xfs_imap_t imap; 261153323Srodrigc#ifdef __KERNEL__ 262153323Srodrigc int i; 263153323Srodrigc int ni; 264153323Srodrigc#endif 265153323Srodrigc 266153323Srodrigc if (ip->i_blkno == (xfs_daddr_t)0) { 267153323Srodrigc /* 268153323Srodrigc * Call the space management code to find the location of the 269153323Srodrigc * inode on disk. 270153323Srodrigc */ 271153323Srodrigc imap.im_blkno = bno; 272159451Srodrigc if ((error = xfs_imap(mp, tp, ip->i_ino, &imap, 273159451Srodrigc XFS_IMAP_LOOKUP | imap_flags))) 274153323Srodrigc return error; 275153323Srodrigc 276153323Srodrigc /* 277153323Srodrigc * If the inode number maps to a block outside the bounds 278153323Srodrigc * of the file system then return NULL rather than calling 279153323Srodrigc * read_buf and panicing when we get an error from the 280153323Srodrigc * driver. 281153323Srodrigc */ 282153323Srodrigc if ((imap.im_blkno + imap.im_len) > 283153323Srodrigc XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 284153323Srodrigc#ifdef DEBUG 285153323Srodrigc xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " 286153323Srodrigc "(imap.im_blkno (0x%llx) " 287153323Srodrigc "+ imap.im_len (0x%llx)) > " 288153323Srodrigc " XFS_FSB_TO_BB(mp, " 289153323Srodrigc "mp->m_sb.sb_dblocks) (0x%llx)", 290153323Srodrigc (unsigned long long) imap.im_blkno, 291153323Srodrigc (unsigned long long) imap.im_len, 292153323Srodrigc XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 293153323Srodrigc#endif /* DEBUG */ 294153323Srodrigc return XFS_ERROR(EINVAL); 295153323Srodrigc } 296153323Srodrigc 297153323Srodrigc /* 298153323Srodrigc * Fill in the fields in the inode that will be used to 299153323Srodrigc * map the inode to its buffer from now on. 300153323Srodrigc */ 301153323Srodrigc ip->i_blkno = imap.im_blkno; 302153323Srodrigc ip->i_len = imap.im_len; 303153323Srodrigc ip->i_boffset = imap.im_boffset; 304153323Srodrigc } else { 305153323Srodrigc /* 306153323Srodrigc * We've already mapped the inode once, so just use the 307153323Srodrigc * mapping that we saved the first time. 308153323Srodrigc */ 309153323Srodrigc imap.im_blkno = ip->i_blkno; 310153323Srodrigc imap.im_len = ip->i_len; 311153323Srodrigc imap.im_boffset = ip->i_boffset; 312153323Srodrigc } 313153323Srodrigc ASSERT(bno == 0 || bno == imap.im_blkno); 314153323Srodrigc 315153323Srodrigc /* 316153323Srodrigc * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will 317153323Srodrigc * default to just a read_buf() call. 318153323Srodrigc */ 319153323Srodrigc error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, 320153323Srodrigc (int)imap.im_len, XFS_BUF_LOCK, &bp); 321153323Srodrigc 322153323Srodrigc if (error) { 323153323Srodrigc#ifdef DEBUG 324153323Srodrigc xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " 325153323Srodrigc "xfs_trans_read_buf() returned error %d, " 326153323Srodrigc "imap.im_blkno 0x%llx, imap.im_len 0x%llx", 327153323Srodrigc error, (unsigned long long) imap.im_blkno, 328153323Srodrigc (unsigned long long) imap.im_len); 329153323Srodrigc#endif /* DEBUG */ 330153323Srodrigc return error; 331153323Srodrigc } 332153323Srodrigc#ifdef __KERNEL__ 333153323Srodrigc /* 334153323Srodrigc * Validate the magic number and version of every inode in the buffer 335153323Srodrigc * (if DEBUG kernel) or the first inode in the buffer, otherwise. 336153323Srodrigc */ 337153323Srodrigc#ifdef DEBUG 338159451Srodrigc ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : 339159451Srodrigc (BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog); 340153323Srodrigc#else 341159451Srodrigc ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : 1; 342153323Srodrigc#endif 343153323Srodrigc for (i = 0; i < ni; i++) { 344153323Srodrigc int di_ok; 345153323Srodrigc xfs_dinode_t *dip; 346153323Srodrigc 347153323Srodrigc dip = (xfs_dinode_t *)xfs_buf_offset(bp, 348153323Srodrigc (i << mp->m_sb.sb_inodelog)); 349153323Srodrigc di_ok = INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC && 350153323Srodrigc XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT)); 351153323Srodrigc if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, 352153323Srodrigc XFS_RANDOM_ITOBP_INOTOBP))) { 353153323Srodrigc#ifdef DEBUG 354153323Srodrigc prdev("bad inode magic/vsn daddr %lld #%d (magic=%x)", 355153323Srodrigc mp->m_ddev_targp, 356153323Srodrigc (unsigned long long)imap.im_blkno, i, 357153323Srodrigc INT_GET(dip->di_core.di_magic, ARCH_CONVERT)); 358153323Srodrigc#endif 359153323Srodrigc XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH, 360153323Srodrigc mp, dip); 361153323Srodrigc xfs_trans_brelse(tp, bp); 362153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 363153323Srodrigc } 364153323Srodrigc } 365153323Srodrigc#endif /* __KERNEL__ */ 366153323Srodrigc 367153323Srodrigc xfs_inobp_check(mp, bp); 368153323Srodrigc 369153323Srodrigc /* 370153323Srodrigc * Mark the buffer as an inode buffer now that it looks good 371153323Srodrigc */ 372153323Srodrigc XFS_BUF_SET_VTYPE(bp, B_FS_INO); 373153323Srodrigc 374153323Srodrigc /* 375153323Srodrigc * Set *dipp to point to the on-disk inode in the buffer. 376153323Srodrigc */ 377153323Srodrigc *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 378153323Srodrigc *bpp = bp; 379153323Srodrigc return 0; 380153323Srodrigc} 381153323Srodrigc 382153323Srodrigc/* 383153323Srodrigc * Move inode type and inode format specific information from the 384153323Srodrigc * on-disk inode to the in-core inode. For fifos, devs, and sockets 385153323Srodrigc * this means set if_rdev to the proper value. For files, directories, 386153323Srodrigc * and symlinks this means to bring in the in-line data or extent 387153323Srodrigc * pointers. For a file in B-tree format, only the root is immediately 388153323Srodrigc * brought in-core. The rest will be in-lined in if_extents when it 389153323Srodrigc * is first referenced (see xfs_iread_extents()). 390153323Srodrigc */ 391153323SrodrigcSTATIC int 392153323Srodrigcxfs_iformat( 393153323Srodrigc xfs_inode_t *ip, 394153323Srodrigc xfs_dinode_t *dip) 395153323Srodrigc{ 396153323Srodrigc xfs_attr_shortform_t *atp; 397153323Srodrigc int size; 398153323Srodrigc int error; 399153323Srodrigc xfs_fsize_t di_size; 400153323Srodrigc ip->i_df.if_ext_max = 401153323Srodrigc XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 402153323Srodrigc error = 0; 403153323Srodrigc 404153323Srodrigc if (unlikely( 405153323Srodrigc INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) + 406153323Srodrigc INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) > 407153323Srodrigc INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT))) { 408159451Srodrigc xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 409159451Srodrigc "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", 410153323Srodrigc (unsigned long long)ip->i_ino, 411153323Srodrigc (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) 412153323Srodrigc + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)), 413153323Srodrigc (unsigned long long) 414153323Srodrigc INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT)); 415153323Srodrigc XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, 416153323Srodrigc ip->i_mount, dip); 417153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 418153323Srodrigc } 419153323Srodrigc 420153323Srodrigc if (unlikely(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize)) { 421159451Srodrigc xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 422159451Srodrigc "corrupt dinode %Lu, forkoff = 0x%x.", 423153323Srodrigc (unsigned long long)ip->i_ino, 424153323Srodrigc (int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT))); 425153323Srodrigc XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 426153323Srodrigc ip->i_mount, dip); 427153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 428153323Srodrigc } 429153323Srodrigc 430153323Srodrigc switch (ip->i_d.di_mode & S_IFMT) { 431153323Srodrigc case S_IFIFO: 432153323Srodrigc case S_IFCHR: 433153323Srodrigc case S_IFBLK: 434153323Srodrigc case S_IFSOCK: 435153323Srodrigc if (unlikely(INT_GET(dip->di_core.di_format, ARCH_CONVERT) != XFS_DINODE_FMT_DEV)) { 436153323Srodrigc XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, 437153323Srodrigc ip->i_mount, dip); 438153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 439153323Srodrigc } 440153323Srodrigc ip->i_d.di_size = 0; 441153323Srodrigc ip->i_df.if_u2.if_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT); 442153323Srodrigc break; 443153323Srodrigc 444153323Srodrigc case S_IFREG: 445153323Srodrigc case S_IFLNK: 446153323Srodrigc case S_IFDIR: 447153323Srodrigc switch (INT_GET(dip->di_core.di_format, ARCH_CONVERT)) { 448153323Srodrigc case XFS_DINODE_FMT_LOCAL: 449153323Srodrigc /* 450153323Srodrigc * no local regular files yet 451153323Srodrigc */ 452153323Srodrigc if (unlikely((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & S_IFMT) == S_IFREG)) { 453159451Srodrigc xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 454159451Srodrigc "corrupt inode %Lu " 455159451Srodrigc "(local format for regular file).", 456153323Srodrigc (unsigned long long) ip->i_ino); 457153323Srodrigc XFS_CORRUPTION_ERROR("xfs_iformat(4)", 458153323Srodrigc XFS_ERRLEVEL_LOW, 459153323Srodrigc ip->i_mount, dip); 460153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 461153323Srodrigc } 462153323Srodrigc 463153323Srodrigc di_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT); 464159451Srodrigc if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 465159451Srodrigc xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 466159451Srodrigc "corrupt inode %Lu " 467159451Srodrigc "(bad size %Ld for local inode).", 468153323Srodrigc (unsigned long long) ip->i_ino, 469153323Srodrigc (long long) di_size); 470153323Srodrigc XFS_CORRUPTION_ERROR("xfs_iformat(5)", 471153323Srodrigc XFS_ERRLEVEL_LOW, 472153323Srodrigc ip->i_mount, dip); 473153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 474153323Srodrigc } 475153323Srodrigc 476153323Srodrigc size = (int)di_size; 477153323Srodrigc error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); 478153323Srodrigc break; 479153323Srodrigc case XFS_DINODE_FMT_EXTENTS: 480153323Srodrigc error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); 481153323Srodrigc break; 482153323Srodrigc case XFS_DINODE_FMT_BTREE: 483153323Srodrigc error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); 484153323Srodrigc break; 485153323Srodrigc default: 486153323Srodrigc XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, 487153323Srodrigc ip->i_mount); 488153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 489153323Srodrigc } 490153323Srodrigc break; 491153323Srodrigc 492153323Srodrigc default: 493153323Srodrigc XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); 494153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 495153323Srodrigc } 496153323Srodrigc if (error) { 497153323Srodrigc return error; 498153323Srodrigc } 499159451Srodrigc if (!XFS_DFORK_Q(dip)) 500153323Srodrigc return 0; 501153323Srodrigc ASSERT(ip->i_afp == NULL); 502153323Srodrigc ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); 503153323Srodrigc ip->i_afp->if_ext_max = 504153323Srodrigc XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 505153323Srodrigc switch (INT_GET(dip->di_core.di_aformat, ARCH_CONVERT)) { 506153323Srodrigc case XFS_DINODE_FMT_LOCAL: 507159451Srodrigc atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); 508159451Srodrigc size = be16_to_cpu(atp->hdr.totsize); 509153323Srodrigc error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); 510153323Srodrigc break; 511153323Srodrigc case XFS_DINODE_FMT_EXTENTS: 512153323Srodrigc error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); 513153323Srodrigc break; 514153323Srodrigc case XFS_DINODE_FMT_BTREE: 515153323Srodrigc error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); 516153323Srodrigc break; 517153323Srodrigc default: 518153323Srodrigc error = XFS_ERROR(EFSCORRUPTED); 519153323Srodrigc break; 520153323Srodrigc } 521153323Srodrigc if (error) { 522153323Srodrigc kmem_zone_free(xfs_ifork_zone, ip->i_afp); 523153323Srodrigc ip->i_afp = NULL; 524153323Srodrigc xfs_idestroy_fork(ip, XFS_DATA_FORK); 525153323Srodrigc } 526153323Srodrigc return error; 527153323Srodrigc} 528153323Srodrigc 529153323Srodrigc/* 530153323Srodrigc * The file is in-lined in the on-disk inode. 531153323Srodrigc * If it fits into if_inline_data, then copy 532153323Srodrigc * it there, otherwise allocate a buffer for it 533153323Srodrigc * and copy the data there. Either way, set 534153323Srodrigc * if_data to point at the data. 535153323Srodrigc * If we allocate a buffer for the data, make 536153323Srodrigc * sure that its size is a multiple of 4 and 537153323Srodrigc * record the real size in i_real_bytes. 538153323Srodrigc */ 539153323SrodrigcSTATIC int 540153323Srodrigcxfs_iformat_local( 541153323Srodrigc xfs_inode_t *ip, 542153323Srodrigc xfs_dinode_t *dip, 543153323Srodrigc int whichfork, 544153323Srodrigc int size) 545153323Srodrigc{ 546153323Srodrigc xfs_ifork_t *ifp; 547153323Srodrigc int real_size; 548153323Srodrigc 549153323Srodrigc /* 550153323Srodrigc * If the size is unreasonable, then something 551153323Srodrigc * is wrong and we just bail out rather than crash in 552153323Srodrigc * kmem_alloc() or memcpy() below. 553153323Srodrigc */ 554159451Srodrigc if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 555159451Srodrigc xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 556159451Srodrigc "corrupt inode %Lu " 557159451Srodrigc "(bad size %d for local fork, size = %d).", 558153323Srodrigc (unsigned long long) ip->i_ino, size, 559159451Srodrigc XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 560153323Srodrigc XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, 561153323Srodrigc ip->i_mount, dip); 562153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 563153323Srodrigc } 564153323Srodrigc ifp = XFS_IFORK_PTR(ip, whichfork); 565153323Srodrigc real_size = 0; 566153323Srodrigc if (size == 0) 567153323Srodrigc ifp->if_u1.if_data = NULL; 568153323Srodrigc else if (size <= sizeof(ifp->if_u2.if_inline_data)) 569153323Srodrigc ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 570153323Srodrigc else { 571153323Srodrigc real_size = roundup(size, 4); 572153323Srodrigc ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 573153323Srodrigc } 574153323Srodrigc ifp->if_bytes = size; 575153323Srodrigc ifp->if_real_bytes = real_size; 576153323Srodrigc if (size) 577159451Srodrigc memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); 578153323Srodrigc ifp->if_flags &= ~XFS_IFEXTENTS; 579153323Srodrigc ifp->if_flags |= XFS_IFINLINE; 580153323Srodrigc return 0; 581153323Srodrigc} 582153323Srodrigc 583153323Srodrigc/* 584153323Srodrigc * The file consists of a set of extents all 585153323Srodrigc * of which fit into the on-disk inode. 586153323Srodrigc * If there are few enough extents to fit into 587153323Srodrigc * the if_inline_ext, then copy them there. 588153323Srodrigc * Otherwise allocate a buffer for them and copy 589153323Srodrigc * them into it. Either way, set if_extents 590153323Srodrigc * to point at the extents. 591153323Srodrigc */ 592153323SrodrigcSTATIC int 593153323Srodrigcxfs_iformat_extents( 594153323Srodrigc xfs_inode_t *ip, 595153323Srodrigc xfs_dinode_t *dip, 596153323Srodrigc int whichfork) 597153323Srodrigc{ 598153323Srodrigc xfs_bmbt_rec_t *ep, *dp; 599153323Srodrigc xfs_ifork_t *ifp; 600153323Srodrigc int nex; 601153323Srodrigc int size; 602153323Srodrigc int i; 603153323Srodrigc 604153323Srodrigc ifp = XFS_IFORK_PTR(ip, whichfork); 605159451Srodrigc nex = XFS_DFORK_NEXTENTS(dip, whichfork); 606153323Srodrigc size = nex * (uint)sizeof(xfs_bmbt_rec_t); 607153323Srodrigc 608153323Srodrigc /* 609153323Srodrigc * If the number of extents is unreasonable, then something 610153323Srodrigc * is wrong and we just bail out rather than crash in 611153323Srodrigc * kmem_alloc() or memcpy() below. 612153323Srodrigc */ 613159451Srodrigc if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 614159451Srodrigc xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 615159451Srodrigc "corrupt inode %Lu ((a)extents = %d).", 616153323Srodrigc (unsigned long long) ip->i_ino, nex); 617153323Srodrigc XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 618153323Srodrigc ip->i_mount, dip); 619153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 620153323Srodrigc } 621153323Srodrigc 622159451Srodrigc ifp->if_real_bytes = 0; 623153323Srodrigc if (nex == 0) 624153323Srodrigc ifp->if_u1.if_extents = NULL; 625153323Srodrigc else if (nex <= XFS_INLINE_EXTS) 626153323Srodrigc ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 627159451Srodrigc else 628159451Srodrigc xfs_iext_add(ifp, 0, nex); 629159451Srodrigc 630153323Srodrigc ifp->if_bytes = size; 631153323Srodrigc if (size) { 632159451Srodrigc dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); 633159451Srodrigc xfs_validate_extents(ifp, nex, 1, XFS_EXTFMT_INODE(ip)); 634159451Srodrigc for (i = 0; i < nex; i++, dp++) { 635159451Srodrigc ep = xfs_iext_get_ext(ifp, i); 636153323Srodrigc ep->l0 = INT_GET(get_unaligned((__uint64_t*)&dp->l0), 637153323Srodrigc ARCH_CONVERT); 638153323Srodrigc ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1), 639153323Srodrigc ARCH_CONVERT); 640153323Srodrigc } 641153323Srodrigc xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex, 642153323Srodrigc whichfork); 643153323Srodrigc if (whichfork != XFS_DATA_FORK || 644153323Srodrigc XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) 645153323Srodrigc if (unlikely(xfs_check_nostate_extents( 646159451Srodrigc ifp, 0, nex))) { 647153323Srodrigc XFS_ERROR_REPORT("xfs_iformat_extents(2)", 648153323Srodrigc XFS_ERRLEVEL_LOW, 649153323Srodrigc ip->i_mount); 650153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 651153323Srodrigc } 652153323Srodrigc } 653153323Srodrigc ifp->if_flags |= XFS_IFEXTENTS; 654153323Srodrigc return 0; 655153323Srodrigc} 656153323Srodrigc 657153323Srodrigc/* 658153323Srodrigc * The file has too many extents to fit into 659153323Srodrigc * the inode, so they are in B-tree format. 660153323Srodrigc * Allocate a buffer for the root of the B-tree 661153323Srodrigc * and copy the root into it. The i_extents 662153323Srodrigc * field will remain NULL until all of the 663153323Srodrigc * extents are read in (when they are needed). 664153323Srodrigc */ 665153323SrodrigcSTATIC int 666153323Srodrigcxfs_iformat_btree( 667153323Srodrigc xfs_inode_t *ip, 668153323Srodrigc xfs_dinode_t *dip, 669153323Srodrigc int whichfork) 670153323Srodrigc{ 671153323Srodrigc xfs_bmdr_block_t *dfp; 672153323Srodrigc xfs_ifork_t *ifp; 673153323Srodrigc /* REFERENCED */ 674153323Srodrigc int nrecs; 675153323Srodrigc int size; 676153323Srodrigc 677153323Srodrigc ifp = XFS_IFORK_PTR(ip, whichfork); 678159451Srodrigc dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); 679153323Srodrigc size = XFS_BMAP_BROOT_SPACE(dfp); 680153323Srodrigc nrecs = XFS_BMAP_BROOT_NUMRECS(dfp); 681153323Srodrigc 682153323Srodrigc /* 683153323Srodrigc * blow out if -- fork has less extents than can fit in 684153323Srodrigc * fork (fork shouldn't be a btree format), root btree 685153323Srodrigc * block has more records than can fit into the fork, 686153323Srodrigc * or the number of extents is greater than the number of 687153323Srodrigc * blocks. 688153323Srodrigc */ 689153323Srodrigc if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max 690153323Srodrigc || XFS_BMDR_SPACE_CALC(nrecs) > 691159451Srodrigc XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) 692153323Srodrigc || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 693159451Srodrigc xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 694159451Srodrigc "corrupt inode %Lu (btree).", 695153323Srodrigc (unsigned long long) ip->i_ino); 696153323Srodrigc XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 697153323Srodrigc ip->i_mount); 698153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 699153323Srodrigc } 700153323Srodrigc 701153323Srodrigc ifp->if_broot_bytes = size; 702153323Srodrigc ifp->if_broot = kmem_alloc(size, KM_SLEEP); 703153323Srodrigc ASSERT(ifp->if_broot != NULL); 704153323Srodrigc /* 705153323Srodrigc * Copy and convert from the on-disk structure 706153323Srodrigc * to the in-memory structure. 707153323Srodrigc */ 708159451Srodrigc xfs_bmdr_to_bmbt(dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), 709153323Srodrigc ifp->if_broot, size); 710153323Srodrigc ifp->if_flags &= ~XFS_IFEXTENTS; 711153323Srodrigc ifp->if_flags |= XFS_IFBROOT; 712153323Srodrigc 713153323Srodrigc return 0; 714153323Srodrigc} 715153323Srodrigc 716153323Srodrigc/* 717153323Srodrigc * xfs_xlate_dinode_core - translate an xfs_inode_core_t between ondisk 718153323Srodrigc * and native format 719153323Srodrigc * 720153323Srodrigc * buf = on-disk representation 721153323Srodrigc * dip = native representation 722153323Srodrigc * dir = direction - +ve -> disk to native 723153323Srodrigc * -ve -> native to disk 724153323Srodrigc */ 725153323Srodrigcvoid 726153323Srodrigcxfs_xlate_dinode_core( 727153323Srodrigc xfs_caddr_t buf, 728153323Srodrigc xfs_dinode_core_t *dip, 729159451Srodrigc int dir) 730153323Srodrigc{ 731153323Srodrigc xfs_dinode_core_t *buf_core = (xfs_dinode_core_t *)buf; 732153323Srodrigc xfs_dinode_core_t *mem_core = (xfs_dinode_core_t *)dip; 733159451Srodrigc xfs_arch_t arch = ARCH_CONVERT; 734153323Srodrigc 735153323Srodrigc ASSERT(dir); 736153323Srodrigc 737153323Srodrigc INT_XLATE(buf_core->di_magic, mem_core->di_magic, dir, arch); 738153323Srodrigc INT_XLATE(buf_core->di_mode, mem_core->di_mode, dir, arch); 739153323Srodrigc INT_XLATE(buf_core->di_version, mem_core->di_version, dir, arch); 740153323Srodrigc INT_XLATE(buf_core->di_format, mem_core->di_format, dir, arch); 741153323Srodrigc INT_XLATE(buf_core->di_onlink, mem_core->di_onlink, dir, arch); 742153323Srodrigc INT_XLATE(buf_core->di_uid, mem_core->di_uid, dir, arch); 743153323Srodrigc INT_XLATE(buf_core->di_gid, mem_core->di_gid, dir, arch); 744153323Srodrigc INT_XLATE(buf_core->di_nlink, mem_core->di_nlink, dir, arch); 745153323Srodrigc INT_XLATE(buf_core->di_projid, mem_core->di_projid, dir, arch); 746153323Srodrigc 747153323Srodrigc if (dir > 0) { 748153323Srodrigc memcpy(mem_core->di_pad, buf_core->di_pad, 749153323Srodrigc sizeof(buf_core->di_pad)); 750153323Srodrigc } else { 751153323Srodrigc memcpy(buf_core->di_pad, mem_core->di_pad, 752153323Srodrigc sizeof(buf_core->di_pad)); 753153323Srodrigc } 754153323Srodrigc 755153323Srodrigc INT_XLATE(buf_core->di_flushiter, mem_core->di_flushiter, dir, arch); 756153323Srodrigc 757153323Srodrigc INT_XLATE(buf_core->di_atime.t_sec, mem_core->di_atime.t_sec, 758153323Srodrigc dir, arch); 759153323Srodrigc INT_XLATE(buf_core->di_atime.t_nsec, mem_core->di_atime.t_nsec, 760153323Srodrigc dir, arch); 761153323Srodrigc INT_XLATE(buf_core->di_mtime.t_sec, mem_core->di_mtime.t_sec, 762153323Srodrigc dir, arch); 763153323Srodrigc INT_XLATE(buf_core->di_mtime.t_nsec, mem_core->di_mtime.t_nsec, 764153323Srodrigc dir, arch); 765153323Srodrigc INT_XLATE(buf_core->di_ctime.t_sec, mem_core->di_ctime.t_sec, 766153323Srodrigc dir, arch); 767153323Srodrigc INT_XLATE(buf_core->di_ctime.t_nsec, mem_core->di_ctime.t_nsec, 768153323Srodrigc dir, arch); 769153323Srodrigc INT_XLATE(buf_core->di_size, mem_core->di_size, dir, arch); 770153323Srodrigc INT_XLATE(buf_core->di_nblocks, mem_core->di_nblocks, dir, arch); 771153323Srodrigc INT_XLATE(buf_core->di_extsize, mem_core->di_extsize, dir, arch); 772153323Srodrigc INT_XLATE(buf_core->di_nextents, mem_core->di_nextents, dir, arch); 773153323Srodrigc INT_XLATE(buf_core->di_anextents, mem_core->di_anextents, dir, arch); 774153323Srodrigc INT_XLATE(buf_core->di_forkoff, mem_core->di_forkoff, dir, arch); 775153323Srodrigc INT_XLATE(buf_core->di_aformat, mem_core->di_aformat, dir, arch); 776153323Srodrigc INT_XLATE(buf_core->di_dmevmask, mem_core->di_dmevmask, dir, arch); 777153323Srodrigc INT_XLATE(buf_core->di_dmstate, mem_core->di_dmstate, dir, arch); 778153323Srodrigc INT_XLATE(buf_core->di_flags, mem_core->di_flags, dir, arch); 779153323Srodrigc INT_XLATE(buf_core->di_gen, mem_core->di_gen, dir, arch); 780153323Srodrigc} 781153323Srodrigc 782159451SrodrigcSTATIC uint 783159451Srodrigc_xfs_dic2xflags( 784159451Srodrigc xfs_dinode_core_t *dic, 785159451Srodrigc __uint16_t di_flags) 786159451Srodrigc{ 787159451Srodrigc uint flags = 0; 788159451Srodrigc 789159451Srodrigc if (di_flags & XFS_DIFLAG_ANY) { 790159451Srodrigc if (di_flags & XFS_DIFLAG_REALTIME) 791159451Srodrigc flags |= XFS_XFLAG_REALTIME; 792159451Srodrigc if (di_flags & XFS_DIFLAG_PREALLOC) 793159451Srodrigc flags |= XFS_XFLAG_PREALLOC; 794159451Srodrigc if (di_flags & XFS_DIFLAG_IMMUTABLE) 795159451Srodrigc flags |= XFS_XFLAG_IMMUTABLE; 796159451Srodrigc if (di_flags & XFS_DIFLAG_APPEND) 797159451Srodrigc flags |= XFS_XFLAG_APPEND; 798159451Srodrigc if (di_flags & XFS_DIFLAG_SYNC) 799159451Srodrigc flags |= XFS_XFLAG_SYNC; 800159451Srodrigc if (di_flags & XFS_DIFLAG_NOATIME) 801159451Srodrigc flags |= XFS_XFLAG_NOATIME; 802159451Srodrigc if (di_flags & XFS_DIFLAG_NODUMP) 803159451Srodrigc flags |= XFS_XFLAG_NODUMP; 804159451Srodrigc if (di_flags & XFS_DIFLAG_RTINHERIT) 805159451Srodrigc flags |= XFS_XFLAG_RTINHERIT; 806159451Srodrigc if (di_flags & XFS_DIFLAG_PROJINHERIT) 807159451Srodrigc flags |= XFS_XFLAG_PROJINHERIT; 808159451Srodrigc if (di_flags & XFS_DIFLAG_NOSYMLINKS) 809159451Srodrigc flags |= XFS_XFLAG_NOSYMLINKS; 810159451Srodrigc if (di_flags & XFS_DIFLAG_EXTSIZE) 811159451Srodrigc flags |= XFS_XFLAG_EXTSIZE; 812159451Srodrigc if (di_flags & XFS_DIFLAG_EXTSZINHERIT) 813159451Srodrigc flags |= XFS_XFLAG_EXTSZINHERIT; 814159451Srodrigc } 815159451Srodrigc 816159451Srodrigc return flags; 817159451Srodrigc} 818159451Srodrigc 819159451Srodrigcuint 820159451Srodrigcxfs_ip2xflags( 821159451Srodrigc xfs_inode_t *ip) 822159451Srodrigc{ 823159451Srodrigc xfs_dinode_core_t *dic = &ip->i_d; 824159451Srodrigc 825159451Srodrigc return _xfs_dic2xflags(dic, dic->di_flags) | 826159451Srodrigc (XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0); 827159451Srodrigc} 828159451Srodrigc 829159451Srodrigcuint 830159451Srodrigcxfs_dic2xflags( 831159451Srodrigc xfs_dinode_core_t *dic) 832159451Srodrigc{ 833159451Srodrigc return _xfs_dic2xflags(dic, INT_GET(dic->di_flags, ARCH_CONVERT)) | 834159451Srodrigc (XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0); 835159451Srodrigc} 836159451Srodrigc 837153323Srodrigc/* 838153323Srodrigc * Given a mount structure and an inode number, return a pointer 839159451Srodrigc * to a newly allocated in-core inode corresponding to the given 840153323Srodrigc * inode number. 841153323Srodrigc * 842153323Srodrigc * Initialize the inode's attributes and extent pointers if it 843153323Srodrigc * already has them (it will not if the inode has no links). 844153323Srodrigc */ 845153323Srodrigcint 846153323Srodrigcxfs_iread( 847153323Srodrigc xfs_mount_t *mp, 848153323Srodrigc xfs_trans_t *tp, 849153323Srodrigc xfs_ino_t ino, 850153323Srodrigc xfs_inode_t **ipp, 851153323Srodrigc xfs_daddr_t bno) 852153323Srodrigc{ 853153323Srodrigc xfs_buf_t *bp; 854153323Srodrigc xfs_dinode_t *dip; 855153323Srodrigc xfs_inode_t *ip; 856153323Srodrigc int error; 857153323Srodrigc 858153323Srodrigc ASSERT(xfs_inode_zone != NULL); 859153323Srodrigc 860153323Srodrigc ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP); 861153323Srodrigc ip->i_ino = ino; 862153323Srodrigc ip->i_mount = mp; 863153323Srodrigc 864153323Srodrigc /* 865153323Srodrigc * Get pointer's to the on-disk inode and the buffer containing it. 866153323Srodrigc * If the inode number refers to a block outside the file system 867153323Srodrigc * then xfs_itobp() will return NULL. In this case we should 868153323Srodrigc * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will 869153323Srodrigc * know that this is a new incore inode. 870153323Srodrigc */ 871159451Srodrigc error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, 0); 872159451Srodrigc if (error) { 873153323Srodrigc kmem_zone_free(xfs_inode_zone, ip); 874153323Srodrigc return error; 875153323Srodrigc } 876153323Srodrigc 877153323Srodrigc /* 878153323Srodrigc * Initialize inode's trace buffers. 879153323Srodrigc * Do this before xfs_iformat in case it adds entries. 880153323Srodrigc */ 881153323Srodrigc#ifdef XFS_BMAP_TRACE 882153323Srodrigc ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP); 883153323Srodrigc#endif 884153323Srodrigc#ifdef XFS_BMBT_TRACE 885153323Srodrigc ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP); 886153323Srodrigc#endif 887153323Srodrigc#ifdef XFS_RW_TRACE 888153323Srodrigc ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP); 889153323Srodrigc#endif 890153323Srodrigc#ifdef XFS_ILOCK_TRACE 891153323Srodrigc ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP); 892153323Srodrigc#endif 893153323Srodrigc#ifdef XFS_DIR2_TRACE 894153323Srodrigc ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP); 895153323Srodrigc#endif 896153323Srodrigc 897153323Srodrigc /* 898153323Srodrigc * If we got something that isn't an inode it means someone 899153323Srodrigc * (nfs or dmi) has a stale handle. 900153323Srodrigc */ 901153323Srodrigc if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) { 902153323Srodrigc kmem_zone_free(xfs_inode_zone, ip); 903153323Srodrigc xfs_trans_brelse(tp, bp); 904153323Srodrigc#ifdef DEBUG 905153323Srodrigc xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 906153323Srodrigc "dip->di_core.di_magic (0x%x) != " 907153323Srodrigc "XFS_DINODE_MAGIC (0x%x)", 908153323Srodrigc INT_GET(dip->di_core.di_magic, ARCH_CONVERT), 909153323Srodrigc XFS_DINODE_MAGIC); 910153323Srodrigc#endif /* DEBUG */ 911153323Srodrigc return XFS_ERROR(EINVAL); 912153323Srodrigc } 913153323Srodrigc 914153323Srodrigc /* 915153323Srodrigc * If the on-disk inode is already linked to a directory 916153323Srodrigc * entry, copy all of the inode into the in-core inode. 917153323Srodrigc * xfs_iformat() handles copying in the inode format 918153323Srodrigc * specific information. 919153323Srodrigc * Otherwise, just get the truly permanent information. 920153323Srodrigc */ 921159451Srodrigc if (dip->di_core.di_mode) { 922153323Srodrigc xfs_xlate_dinode_core((xfs_caddr_t)&dip->di_core, 923159451Srodrigc &(ip->i_d), 1); 924153323Srodrigc error = xfs_iformat(ip, dip); 925153323Srodrigc if (error) { 926153323Srodrigc kmem_zone_free(xfs_inode_zone, ip); 927153323Srodrigc xfs_trans_brelse(tp, bp); 928153323Srodrigc#ifdef DEBUG 929153323Srodrigc xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 930153323Srodrigc "xfs_iformat() returned error %d", 931153323Srodrigc error); 932153323Srodrigc#endif /* DEBUG */ 933153323Srodrigc return error; 934153323Srodrigc } 935153323Srodrigc } else { 936153323Srodrigc ip->i_d.di_magic = INT_GET(dip->di_core.di_magic, ARCH_CONVERT); 937153323Srodrigc ip->i_d.di_version = INT_GET(dip->di_core.di_version, ARCH_CONVERT); 938153323Srodrigc ip->i_d.di_gen = INT_GET(dip->di_core.di_gen, ARCH_CONVERT); 939153323Srodrigc ip->i_d.di_flushiter = INT_GET(dip->di_core.di_flushiter, ARCH_CONVERT); 940153323Srodrigc /* 941153323Srodrigc * Make sure to pull in the mode here as well in 942153323Srodrigc * case the inode is released without being used. 943153323Srodrigc * This ensures that xfs_inactive() will see that 944153323Srodrigc * the inode is already free and not try to mess 945153323Srodrigc * with the uninitialized part of it. 946153323Srodrigc */ 947153323Srodrigc ip->i_d.di_mode = 0; 948153323Srodrigc /* 949153323Srodrigc * Initialize the per-fork minima and maxima for a new 950153323Srodrigc * inode here. xfs_iformat will do it for old inodes. 951153323Srodrigc */ 952153323Srodrigc ip->i_df.if_ext_max = 953153323Srodrigc XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 954153323Srodrigc } 955153323Srodrigc 956159451Srodrigc#ifdef RMC 957159451Srodrigc INIT_LIST_HEAD(&ip->i_reclaim); 958159451Srodrigc#else 959159451Srodrigc bzero(&ip->i_reclaim,sizeof(ip->i_reclaim)); 960159451Srodrigc#endif 961153323Srodrigc 962159451Srodrigc 963153323Srodrigc /* 964153323Srodrigc * The inode format changed when we moved the link count and 965153323Srodrigc * made it 32 bits long. If this is an old format inode, 966153323Srodrigc * convert it in memory to look like a new one. If it gets 967153323Srodrigc * flushed to disk we will convert back before flushing or 968153323Srodrigc * logging it. We zero out the new projid field and the old link 969153323Srodrigc * count field. We'll handle clearing the pad field (the remains 970153323Srodrigc * of the old uuid field) when we actually convert the inode to 971153323Srodrigc * the new format. We don't change the version number so that we 972153323Srodrigc * can distinguish this from a real new format inode. 973153323Srodrigc */ 974153323Srodrigc if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { 975153323Srodrigc ip->i_d.di_nlink = ip->i_d.di_onlink; 976153323Srodrigc ip->i_d.di_onlink = 0; 977153323Srodrigc ip->i_d.di_projid = 0; 978153323Srodrigc } 979153323Srodrigc 980153323Srodrigc ip->i_delayed_blks = 0; 981153323Srodrigc 982153323Srodrigc /* 983153323Srodrigc * Mark the buffer containing the inode as something to keep 984153323Srodrigc * around for a while. This helps to keep recently accessed 985153323Srodrigc * meta-data in-core longer. 986153323Srodrigc */ 987153323Srodrigc XFS_BUF_SET_REF(bp, XFS_INO_REF); 988153323Srodrigc 989153323Srodrigc /* 990153323Srodrigc * Use xfs_trans_brelse() to release the buffer containing the 991153323Srodrigc * on-disk inode, because it was acquired with xfs_trans_read_buf() 992153323Srodrigc * in xfs_itobp() above. If tp is NULL, this is just a normal 993153323Srodrigc * brelse(). If we're within a transaction, then xfs_trans_brelse() 994153323Srodrigc * will only release the buffer if it is not dirty within the 995153323Srodrigc * transaction. It will be OK to release the buffer in this case, 996153323Srodrigc * because inodes on disk are never destroyed and we will be 997153323Srodrigc * locking the new in-core inode before putting it in the hash 998153323Srodrigc * table where other processes can find it. Thus we don't have 999153323Srodrigc * to worry about the inode being changed just because we released 1000153323Srodrigc * the buffer. 1001153323Srodrigc */ 1002153323Srodrigc xfs_trans_brelse(tp, bp); 1003153323Srodrigc *ipp = ip; 1004153323Srodrigc return 0; 1005153323Srodrigc} 1006153323Srodrigc 1007153323Srodrigc/* 1008153323Srodrigc * Read in extents from a btree-format inode. 1009153323Srodrigc * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. 1010153323Srodrigc */ 1011153323Srodrigcint 1012153323Srodrigcxfs_iread_extents( 1013153323Srodrigc xfs_trans_t *tp, 1014153323Srodrigc xfs_inode_t *ip, 1015153323Srodrigc int whichfork) 1016153323Srodrigc{ 1017153323Srodrigc int error; 1018153323Srodrigc xfs_ifork_t *ifp; 1019159451Srodrigc xfs_extnum_t nextents; 1020153323Srodrigc size_t size; 1021153323Srodrigc 1022153323Srodrigc if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 1023153323Srodrigc XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, 1024153323Srodrigc ip->i_mount); 1025153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 1026153323Srodrigc } 1027159451Srodrigc nextents = XFS_IFORK_NEXTENTS(ip, whichfork); 1028159451Srodrigc size = nextents * sizeof(xfs_bmbt_rec_t); 1029153323Srodrigc ifp = XFS_IFORK_PTR(ip, whichfork); 1030159451Srodrigc 1031153323Srodrigc /* 1032153323Srodrigc * We know that the size is valid (it's checked in iformat_btree) 1033153323Srodrigc */ 1034153323Srodrigc ifp->if_lastex = NULLEXTNUM; 1035159451Srodrigc ifp->if_bytes = ifp->if_real_bytes = 0; 1036153323Srodrigc ifp->if_flags |= XFS_IFEXTENTS; 1037159451Srodrigc xfs_iext_add(ifp, 0, nextents); 1038153323Srodrigc error = xfs_bmap_read_extents(tp, ip, whichfork); 1039153323Srodrigc if (error) { 1040159451Srodrigc xfs_iext_destroy(ifp); 1041153323Srodrigc ifp->if_flags &= ~XFS_IFEXTENTS; 1042153323Srodrigc return error; 1043153323Srodrigc } 1044159451Srodrigc xfs_validate_extents(ifp, nextents, 0, XFS_EXTFMT_INODE(ip)); 1045153323Srodrigc return 0; 1046153323Srodrigc} 1047153323Srodrigc 1048153323Srodrigc/* 1049153323Srodrigc * Allocate an inode on disk and return a copy of its in-core version. 1050153323Srodrigc * The in-core inode is locked exclusively. Set mode, nlink, and rdev 1051153323Srodrigc * appropriately within the inode. The uid and gid for the inode are 1052153323Srodrigc * set according to the contents of the given cred structure. 1053153323Srodrigc * 1054153323Srodrigc * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() 1055153323Srodrigc * has a free inode available, call xfs_iget() 1056153323Srodrigc * to obtain the in-core version of the allocated inode. Finally, 1057153323Srodrigc * fill in the inode and log its initial contents. In this case, 1058153323Srodrigc * ialloc_context would be set to NULL and call_again set to false. 1059153323Srodrigc * 1060153323Srodrigc * If xfs_dialloc() does not have an available inode, 1061153323Srodrigc * it will replenish its supply by doing an allocation. Since we can 1062153323Srodrigc * only do one allocation within a transaction without deadlocks, we 1063153323Srodrigc * must commit the current transaction before returning the inode itself. 1064153323Srodrigc * In this case, therefore, we will set call_again to true and return. 1065153323Srodrigc * The caller should then commit the current transaction, start a new 1066153323Srodrigc * transaction, and call xfs_ialloc() again to actually get the inode. 1067153323Srodrigc * 1068153323Srodrigc * To ensure that some other process does not grab the inode that 1069153323Srodrigc * was allocated during the first call to xfs_ialloc(), this routine 1070153323Srodrigc * also returns the [locked] bp pointing to the head of the freelist 1071153323Srodrigc * as ialloc_context. The caller should hold this buffer across 1072153323Srodrigc * the commit and pass it back into this routine on the second call. 1073153323Srodrigc */ 1074153323Srodrigcint 1075153323Srodrigcxfs_ialloc( 1076153323Srodrigc xfs_trans_t *tp, 1077153323Srodrigc xfs_inode_t *pip, 1078153323Srodrigc mode_t mode, 1079159451Srodrigc xfs_nlink_t nlink, 1080153323Srodrigc xfs_dev_t rdev, 1081153323Srodrigc cred_t *cr, 1082153323Srodrigc xfs_prid_t prid, 1083153323Srodrigc int okalloc, 1084153323Srodrigc xfs_buf_t **ialloc_context, 1085153323Srodrigc boolean_t *call_again, 1086153323Srodrigc xfs_inode_t **ipp) 1087153323Srodrigc{ 1088153323Srodrigc xfs_ino_t ino; 1089153323Srodrigc xfs_inode_t *ip; 1090153323Srodrigc xfs_vnode_t *vp; 1091153323Srodrigc uint flags; 1092153323Srodrigc int error; 1093153323Srodrigc 1094153323Srodrigc /* 1095153323Srodrigc * Call the space management code to pick 1096153323Srodrigc * the on-disk inode to be allocated. 1097153323Srodrigc */ 1098159451Srodrigc error = xfs_dialloc(tp, pip->i_ino, mode, okalloc, 1099153323Srodrigc ialloc_context, call_again, &ino); 1100153323Srodrigc if (error != 0) { 1101153323Srodrigc return error; 1102153323Srodrigc } 1103153323Srodrigc if (*call_again || ino == NULLFSINO) { 1104153323Srodrigc *ipp = NULL; 1105153323Srodrigc return 0; 1106153323Srodrigc } 1107153323Srodrigc ASSERT(*ialloc_context == NULL); 1108153323Srodrigc 1109153323Srodrigc /* 1110153323Srodrigc * Get the in-core inode with the lock held exclusively. 1111153323Srodrigc * This is because we're setting fields here we need 1112153323Srodrigc * to prevent others from looking at until we're done. 1113153323Srodrigc */ 1114159451Srodrigc error = xfs_trans_iget(tp->t_mountp, tp, ino, 1115159451Srodrigc IGET_CREATE, XFS_ILOCK_EXCL, &ip); 1116153323Srodrigc if (error != 0) { 1117153323Srodrigc return error; 1118153323Srodrigc } 1119153323Srodrigc ASSERT(ip != NULL); 1120159451Srodrigc 1121153323Srodrigc vp = XFS_ITOV(ip); 1122153323Srodrigc ip->i_d.di_mode = (__uint16_t)mode; 1123153323Srodrigc ip->i_d.di_onlink = 0; 1124153323Srodrigc ip->i_d.di_nlink = nlink; 1125153323Srodrigc ASSERT(ip->i_d.di_nlink == nlink); 1126153323Srodrigc ip->i_d.di_uid = curthread->td_ucred->cr_uid; 1127153323Srodrigc ip->i_d.di_gid = curthread->td_ucred->cr_groups[0]; 1128153323Srodrigc ip->i_d.di_projid = prid; 1129153323Srodrigc memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 1130153323Srodrigc 1131153323Srodrigc /* 1132153323Srodrigc * If the superblock version is up to where we support new format 1133153323Srodrigc * inodes and this is currently an old format inode, then change 1134153323Srodrigc * the inode version number now. This way we only do the conversion 1135153323Srodrigc * here rather than here and in the flush/logging code. 1136153323Srodrigc */ 1137153323Srodrigc if (XFS_SB_VERSION_HASNLINK(&tp->t_mountp->m_sb) && 1138153323Srodrigc ip->i_d.di_version == XFS_DINODE_VERSION_1) { 1139153323Srodrigc ip->i_d.di_version = XFS_DINODE_VERSION_2; 1140153323Srodrigc /* 1141153323Srodrigc * We've already zeroed the old link count, the projid field, 1142153323Srodrigc * and the pad field. 1143153323Srodrigc */ 1144153323Srodrigc } 1145153323Srodrigc 1146153323Srodrigc /* 1147153323Srodrigc * Project ids won't be stored on disk if we are using a version 1 inode. 1148153323Srodrigc */ 1149153323Srodrigc if ( (prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1)) 1150153323Srodrigc xfs_bump_ino_vers2(tp, ip); 1151153323Srodrigc 1152153323Srodrigc if (XFS_INHERIT_GID(pip, vp->v_vfsp)) { 1153153323Srodrigc ip->i_d.di_gid = pip->i_d.di_gid; 1154153323Srodrigc if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) { 1155153323Srodrigc ip->i_d.di_mode |= S_ISGID; 1156153323Srodrigc } 1157153323Srodrigc } 1158153323Srodrigc 1159153323Srodrigc /* 1160153323Srodrigc * If the group ID of the new file does not match the effective group 1161153323Srodrigc * ID or one of the supplementary group IDs, the S_ISGID bit is cleared 1162153323Srodrigc * (and only if the irix_sgid_inherit compatibility variable is set). 1163153323Srodrigc */ 1164153323Srodrigc if ((irix_sgid_inherit) && 1165153323Srodrigc (ip->i_d.di_mode & S_ISGID) && 1166153323Srodrigc (!groupmember((gid_t)ip->i_d.di_gid, curthread->td_ucred))) { 1167153323Srodrigc ip->i_d.di_mode &= ~S_ISGID; 1168153323Srodrigc } 1169153323Srodrigc 1170153323Srodrigc ip->i_d.di_size = 0; 1171153323Srodrigc ip->i_d.di_nextents = 0; 1172153323Srodrigc ASSERT(ip->i_d.di_nblocks == 0); 1173153323Srodrigc xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD); 1174153323Srodrigc /* 1175153323Srodrigc * di_gen will have been taken care of in xfs_iread. 1176153323Srodrigc */ 1177153323Srodrigc ip->i_d.di_extsize = 0; 1178153323Srodrigc ip->i_d.di_dmevmask = 0; 1179153323Srodrigc ip->i_d.di_dmstate = 0; 1180153323Srodrigc ip->i_d.di_flags = 0; 1181153323Srodrigc flags = XFS_ILOG_CORE; 1182153323Srodrigc switch (mode & S_IFMT) { 1183153323Srodrigc case S_IFIFO: 1184153323Srodrigc case S_IFCHR: 1185153323Srodrigc case S_IFBLK: 1186153323Srodrigc case S_IFSOCK: 1187153323Srodrigc ip->i_d.di_format = XFS_DINODE_FMT_DEV; 1188153323Srodrigc ip->i_df.if_u2.if_rdev = rdev; 1189153323Srodrigc ip->i_df.if_flags = 0; 1190153323Srodrigc flags |= XFS_ILOG_DEV; 1191153323Srodrigc break; 1192153323Srodrigc case S_IFREG: 1193153323Srodrigc case S_IFDIR: 1194159451Srodrigc if (unlikely(pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 1195159451Srodrigc uint di_flags = 0; 1196159451Srodrigc 1197159451Srodrigc if ((mode & S_IFMT) == S_IFDIR) { 1198159451Srodrigc if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1199159451Srodrigc di_flags |= XFS_DIFLAG_RTINHERIT; 1200159451Srodrigc if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1201159451Srodrigc di_flags |= XFS_DIFLAG_EXTSZINHERIT; 1202159451Srodrigc ip->i_d.di_extsize = pip->i_d.di_extsize; 1203159451Srodrigc } 1204159451Srodrigc } else if ((mode & S_IFMT) == S_IFREG) { 1205159451Srodrigc if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) { 1206159451Srodrigc di_flags |= XFS_DIFLAG_REALTIME; 1207159451Srodrigc ip->i_iocore.io_flags |= XFS_IOCORE_RT; 1208159451Srodrigc } 1209159451Srodrigc if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1210159451Srodrigc di_flags |= XFS_DIFLAG_EXTSIZE; 1211159451Srodrigc ip->i_d.di_extsize = pip->i_d.di_extsize; 1212159451Srodrigc } 1213159451Srodrigc } 1214153323Srodrigc if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && 1215153323Srodrigc xfs_inherit_noatime) 1216159451Srodrigc di_flags |= XFS_DIFLAG_NOATIME; 1217153323Srodrigc if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && 1218153323Srodrigc xfs_inherit_nodump) 1219159451Srodrigc di_flags |= XFS_DIFLAG_NODUMP; 1220153323Srodrigc if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && 1221153323Srodrigc xfs_inherit_sync) 1222159451Srodrigc di_flags |= XFS_DIFLAG_SYNC; 1223159451Srodrigc if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && 1224159451Srodrigc xfs_inherit_nosymlinks) 1225159451Srodrigc di_flags |= XFS_DIFLAG_NOSYMLINKS; 1226159451Srodrigc if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1227159451Srodrigc di_flags |= XFS_DIFLAG_PROJINHERIT; 1228159451Srodrigc ip->i_d.di_flags |= di_flags; 1229153323Srodrigc } 1230159451Srodrigc /* FALLTHROUGH */ 1231153323Srodrigc case S_IFLNK: 1232153323Srodrigc ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 1233153323Srodrigc ip->i_df.if_flags = XFS_IFEXTENTS; 1234153323Srodrigc ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; 1235153323Srodrigc ip->i_df.if_u1.if_extents = NULL; 1236153323Srodrigc break; 1237153323Srodrigc default: 1238153323Srodrigc ASSERT(0); 1239153323Srodrigc } 1240153323Srodrigc /* 1241153323Srodrigc * Attribute fork settings for new inode. 1242153323Srodrigc */ 1243153323Srodrigc ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 1244153323Srodrigc ip->i_d.di_anextents = 0; 1245153323Srodrigc 1246153323Srodrigc /* 1247153323Srodrigc * Log the new values stuffed into the inode. 1248153323Srodrigc */ 1249153323Srodrigc xfs_trans_log_inode(tp, ip, flags); 1250153323Srodrigc 1251159451Srodrigc /* now that we have an i_mode we can set Linux inode ops (& unlock) */ 1252153323Srodrigc XVFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); 1253153323Srodrigc 1254153323Srodrigc *ipp = ip; 1255153323Srodrigc return 0; 1256153323Srodrigc} 1257153323Srodrigc 1258153323Srodrigc/* 1259153323Srodrigc * Check to make sure that there are no blocks allocated to the 1260153323Srodrigc * file beyond the size of the file. We don't check this for 1261153323Srodrigc * files with fixed size extents or real time extents, but we 1262153323Srodrigc * at least do it for regular files. 1263153323Srodrigc */ 1264153323Srodrigc#ifdef DEBUG 1265153323Srodrigcvoid 1266153323Srodrigcxfs_isize_check( 1267153323Srodrigc xfs_mount_t *mp, 1268153323Srodrigc xfs_inode_t *ip, 1269153323Srodrigc xfs_fsize_t isize) 1270153323Srodrigc{ 1271153323Srodrigc xfs_fileoff_t map_first; 1272153323Srodrigc int nimaps; 1273153323Srodrigc xfs_bmbt_irec_t imaps[2]; 1274153323Srodrigc 1275153323Srodrigc if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) 1276153323Srodrigc return; 1277153323Srodrigc 1278159451Srodrigc if (ip->i_d.di_flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_EXTSIZE)) 1279153323Srodrigc return; 1280153323Srodrigc 1281153323Srodrigc nimaps = 2; 1282153323Srodrigc map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); 1283153323Srodrigc /* 1284153323Srodrigc * The filesystem could be shutting down, so bmapi may return 1285153323Srodrigc * an error. 1286153323Srodrigc */ 1287153323Srodrigc if (xfs_bmapi(NULL, ip, map_first, 1288153323Srodrigc (XFS_B_TO_FSB(mp, 1289153323Srodrigc (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - 1290153323Srodrigc map_first), 1291153323Srodrigc XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, 1292159451Srodrigc NULL, NULL)) 1293153323Srodrigc return; 1294153323Srodrigc ASSERT(nimaps == 1); 1295153323Srodrigc ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); 1296153323Srodrigc} 1297153323Srodrigc#endif /* DEBUG */ 1298153323Srodrigc 1299153323Srodrigc/* 1300153323Srodrigc * Calculate the last possible buffered byte in a file. This must 1301153323Srodrigc * include data that was buffered beyond the EOF by the write code. 1302153323Srodrigc * This also needs to deal with overflowing the xfs_fsize_t type 1303153323Srodrigc * which can happen for sizes near the limit. 1304153323Srodrigc * 1305153323Srodrigc * We also need to take into account any blocks beyond the EOF. It 1306153323Srodrigc * may be the case that they were buffered by a write which failed. 1307153323Srodrigc * In that case the pages will still be in memory, but the inode size 1308153323Srodrigc * will never have been updated. 1309153323Srodrigc */ 1310153323Srodrigcxfs_fsize_t 1311153323Srodrigcxfs_file_last_byte( 1312153323Srodrigc xfs_inode_t *ip) 1313153323Srodrigc{ 1314153323Srodrigc xfs_mount_t *mp; 1315153323Srodrigc xfs_fsize_t last_byte; 1316153323Srodrigc xfs_fileoff_t last_block; 1317153323Srodrigc xfs_fileoff_t size_last_block; 1318153323Srodrigc int error; 1319153323Srodrigc 1320153323Srodrigc ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE | MR_ACCESS)); 1321153323Srodrigc 1322153323Srodrigc mp = ip->i_mount; 1323153323Srodrigc /* 1324153323Srodrigc * Only check for blocks beyond the EOF if the extents have 1325153323Srodrigc * been read in. This eliminates the need for the inode lock, 1326153323Srodrigc * and it also saves us from looking when it really isn't 1327153323Srodrigc * necessary. 1328153323Srodrigc */ 1329153323Srodrigc if (ip->i_df.if_flags & XFS_IFEXTENTS) { 1330153323Srodrigc error = xfs_bmap_last_offset(NULL, ip, &last_block, 1331153323Srodrigc XFS_DATA_FORK); 1332153323Srodrigc if (error) { 1333153323Srodrigc last_block = 0; 1334153323Srodrigc } 1335153323Srodrigc } else { 1336153323Srodrigc last_block = 0; 1337153323Srodrigc } 1338153323Srodrigc size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_d.di_size); 1339153323Srodrigc last_block = XFS_FILEOFF_MAX(last_block, size_last_block); 1340153323Srodrigc 1341153323Srodrigc last_byte = XFS_FSB_TO_B(mp, last_block); 1342153323Srodrigc if (last_byte < 0) { 1343153323Srodrigc return XFS_MAXIOFFSET(mp); 1344153323Srodrigc } 1345153323Srodrigc last_byte += (1 << mp->m_writeio_log); 1346153323Srodrigc if (last_byte < 0) { 1347153323Srodrigc return XFS_MAXIOFFSET(mp); 1348153323Srodrigc } 1349153323Srodrigc return last_byte; 1350153323Srodrigc} 1351153323Srodrigc 1352153323Srodrigc#if defined(XFS_RW_TRACE) 1353153323SrodrigcSTATIC void 1354153323Srodrigcxfs_itrunc_trace( 1355153323Srodrigc int tag, 1356153323Srodrigc xfs_inode_t *ip, 1357153323Srodrigc int flag, 1358153323Srodrigc xfs_fsize_t new_size, 1359153323Srodrigc xfs_off_t toss_start, 1360153323Srodrigc xfs_off_t toss_finish) 1361153323Srodrigc{ 1362153323Srodrigc if (ip->i_rwtrace == NULL) { 1363153323Srodrigc return; 1364153323Srodrigc } 1365153323Srodrigc 1366153323Srodrigc ktrace_enter(ip->i_rwtrace, 1367153323Srodrigc (void*)((long)tag), 1368153323Srodrigc (void*)ip, 1369153323Srodrigc (void*)(unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff), 1370153323Srodrigc (void*)(unsigned long)(ip->i_d.di_size & 0xffffffff), 1371153323Srodrigc (void*)((long)flag), 1372153323Srodrigc (void*)(unsigned long)((new_size >> 32) & 0xffffffff), 1373153323Srodrigc (void*)(unsigned long)(new_size & 0xffffffff), 1374153323Srodrigc (void*)(unsigned long)((toss_start >> 32) & 0xffffffff), 1375153323Srodrigc (void*)(unsigned long)(toss_start & 0xffffffff), 1376153323Srodrigc (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff), 1377153323Srodrigc (void*)(unsigned long)(toss_finish & 0xffffffff), 1378153323Srodrigc (void*)(unsigned long)current_cpu(), 1379159451Srodrigc (void*)(unsigned long)current_pid(), 1380159451Srodrigc (void*)NULL, 1381159451Srodrigc (void*)NULL, 1382159451Srodrigc (void*)NULL); 1383153323Srodrigc} 1384153323Srodrigc#else 1385153323Srodrigc#define xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish) 1386153323Srodrigc#endif 1387153323Srodrigc 1388153323Srodrigc/* 1389153323Srodrigc * Start the truncation of the file to new_size. The new size 1390153323Srodrigc * must be smaller than the current size. This routine will 1391153323Srodrigc * clear the buffer and page caches of file data in the removed 1392153323Srodrigc * range, and xfs_itruncate_finish() will remove the underlying 1393153323Srodrigc * disk blocks. 1394153323Srodrigc * 1395153323Srodrigc * The inode must have its I/O lock locked EXCLUSIVELY, and it 1396153323Srodrigc * must NOT have the inode lock held at all. This is because we're 1397153323Srodrigc * calling into the buffer/page cache code and we can't hold the 1398153323Srodrigc * inode lock when we do so. 1399153323Srodrigc * 1400159451Srodrigc * We need to wait for any direct I/Os in flight to complete before we 1401159451Srodrigc * proceed with the truncate. This is needed to prevent the extents 1402159451Srodrigc * being read or written by the direct I/Os from being removed while the 1403159451Srodrigc * I/O is in flight as there is no other method of synchronising 1404159451Srodrigc * direct I/O with the truncate operation. Also, because we hold 1405159451Srodrigc * the IOLOCK in exclusive mode, we prevent new direct I/Os from being 1406159451Srodrigc * started until the truncate completes and drops the lock. Essentially, 1407159451Srodrigc * the vn_iowait() call forms an I/O barrier that provides strict ordering 1408159451Srodrigc * between direct I/Os and the truncate operation. 1409159451Srodrigc * 1410153323Srodrigc * The flags parameter can have either the value XFS_ITRUNC_DEFINITE 1411153323Srodrigc * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used 1412153323Srodrigc * in the case that the caller is locking things out of order and 1413153323Srodrigc * may not be able to call xfs_itruncate_finish() with the inode lock 1414153323Srodrigc * held without dropping the I/O lock. If the caller must drop the 1415153323Srodrigc * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start() 1416153323Srodrigc * must be called again with all the same restrictions as the initial 1417153323Srodrigc * call. 1418153323Srodrigc */ 1419153323Srodrigcvoid 1420153323Srodrigcxfs_itruncate_start( 1421153323Srodrigc xfs_inode_t *ip, 1422153323Srodrigc uint flags, 1423153323Srodrigc xfs_fsize_t new_size) 1424153323Srodrigc{ 1425153323Srodrigc xfs_fsize_t last_byte; 1426153323Srodrigc xfs_off_t toss_start; 1427153323Srodrigc xfs_mount_t *mp; 1428153323Srodrigc xfs_vnode_t *vp; 1429153323Srodrigc 1430153323Srodrigc ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1431153323Srodrigc ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size)); 1432153323Srodrigc ASSERT((flags == XFS_ITRUNC_DEFINITE) || 1433153323Srodrigc (flags == XFS_ITRUNC_MAYBE)); 1434153323Srodrigc 1435153323Srodrigc mp = ip->i_mount; 1436153323Srodrigc vp = XFS_ITOV(ip); 1437159451Srodrigc 1438159451Srodrigc vn_iowait(vp); /* wait for the completion of any pending DIOs */ 1439159451Srodrigc 1440153323Srodrigc /* 1441153323Srodrigc * Call VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES() to get rid of pages and buffers 1442153323Srodrigc * overlapping the region being removed. We have to use 1443153323Srodrigc * the less efficient VOP_FLUSHINVAL_PAGES() in the case that the 1444153323Srodrigc * caller may not be able to finish the truncate without 1445153323Srodrigc * dropping the inode's I/O lock. Make sure 1446153323Srodrigc * to catch any pages brought in by buffers overlapping 1447153323Srodrigc * the EOF by searching out beyond the isize by our 1448153323Srodrigc * block size. We round new_size up to a block boundary 1449153323Srodrigc * so that we don't toss things on the same block as 1450153323Srodrigc * new_size but before it. 1451153323Srodrigc * 1452153323Srodrigc * Before calling VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES(), make sure to 1453153323Srodrigc * call remapf() over the same region if the file is mapped. 1454153323Srodrigc * This frees up mapped file references to the pages in the 1455153323Srodrigc * given range and for the VOP_FLUSHINVAL_PAGES() case it ensures 1456153323Srodrigc * that we get the latest mapped changes flushed out. 1457153323Srodrigc */ 1458153323Srodrigc toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 1459153323Srodrigc toss_start = XFS_FSB_TO_B(mp, toss_start); 1460153323Srodrigc if (toss_start < 0) { 1461153323Srodrigc /* 1462153323Srodrigc * The place to start tossing is beyond our maximum 1463153323Srodrigc * file size, so there is no way that the data extended 1464153323Srodrigc * out there. 1465153323Srodrigc */ 1466153323Srodrigc return; 1467153323Srodrigc } 1468153323Srodrigc last_byte = xfs_file_last_byte(ip); 1469153323Srodrigc xfs_itrunc_trace(XFS_ITRUNC_START, ip, flags, new_size, toss_start, 1470153323Srodrigc last_byte); 1471153323Srodrigc if (last_byte > toss_start) { 1472153323Srodrigc if (flags & XFS_ITRUNC_DEFINITE) { 1473153323Srodrigc XVOP_TOSS_PAGES(vp, toss_start, -1, FI_REMAPF_LOCKED); 1474153323Srodrigc } else { 1475153323Srodrigc XVOP_FLUSHINVAL_PAGES(vp, toss_start, -1, FI_REMAPF_LOCKED); 1476153323Srodrigc } 1477153323Srodrigc } 1478153323Srodrigc 1479153323Srodrigc#ifdef DEBUG 1480153323Srodrigc if (new_size == 0) { 1481153323Srodrigc ASSERT(VN_CACHED(vp) == 0); 1482153323Srodrigc } 1483153323Srodrigc#endif 1484153323Srodrigc} 1485153323Srodrigc 1486153323Srodrigc/* 1487153323Srodrigc * Shrink the file to the given new_size. The new 1488153323Srodrigc * size must be smaller than the current size. 1489153323Srodrigc * This will free up the underlying blocks 1490153323Srodrigc * in the removed range after a call to xfs_itruncate_start() 1491153323Srodrigc * or xfs_atruncate_start(). 1492153323Srodrigc * 1493153323Srodrigc * The transaction passed to this routine must have made 1494153323Srodrigc * a permanent log reservation of at least XFS_ITRUNCATE_LOG_RES. 1495153323Srodrigc * This routine may commit the given transaction and 1496153323Srodrigc * start new ones, so make sure everything involved in 1497153323Srodrigc * the transaction is tidy before calling here. 1498153323Srodrigc * Some transaction will be returned to the caller to be 1499153323Srodrigc * committed. The incoming transaction must already include 1500153323Srodrigc * the inode, and both inode locks must be held exclusively. 1501153323Srodrigc * The inode must also be "held" within the transaction. On 1502153323Srodrigc * return the inode will be "held" within the returned transaction. 1503153323Srodrigc * This routine does NOT require any disk space to be reserved 1504153323Srodrigc * for it within the transaction. 1505153323Srodrigc * 1506153323Srodrigc * The fork parameter must be either xfs_attr_fork or xfs_data_fork, 1507153323Srodrigc * and it indicates the fork which is to be truncated. For the 1508153323Srodrigc * attribute fork we only support truncation to size 0. 1509153323Srodrigc * 1510153323Srodrigc * We use the sync parameter to indicate whether or not the first 1511153323Srodrigc * transaction we perform might have to be synchronous. For the attr fork, 1512153323Srodrigc * it needs to be so if the unlink of the inode is not yet known to be 1513153323Srodrigc * permanent in the log. This keeps us from freeing and reusing the 1514153323Srodrigc * blocks of the attribute fork before the unlink of the inode becomes 1515153323Srodrigc * permanent. 1516153323Srodrigc * 1517153323Srodrigc * For the data fork, we normally have to run synchronously if we're 1518153323Srodrigc * being called out of the inactive path or we're being called 1519153323Srodrigc * out of the create path where we're truncating an existing file. 1520153323Srodrigc * Either way, the truncate needs to be sync so blocks don't reappear 1521153323Srodrigc * in the file with altered data in case of a crash. wsync filesystems 1522153323Srodrigc * can run the first case async because anything that shrinks the inode 1523153323Srodrigc * has to run sync so by the time we're called here from inactive, the 1524153323Srodrigc * inode size is permanently set to 0. 1525153323Srodrigc * 1526153323Srodrigc * Calls from the truncate path always need to be sync unless we're 1527153323Srodrigc * in a wsync filesystem and the file has already been unlinked. 1528153323Srodrigc * 1529153323Srodrigc * The caller is responsible for correctly setting the sync parameter. 1530153323Srodrigc * It gets too hard for us to guess here which path we're being called 1531153323Srodrigc * out of just based on inode state. 1532153323Srodrigc */ 1533153323Srodrigcint 1534153323Srodrigcxfs_itruncate_finish( 1535153323Srodrigc xfs_trans_t **tp, 1536153323Srodrigc xfs_inode_t *ip, 1537153323Srodrigc xfs_fsize_t new_size, 1538153323Srodrigc int fork, 1539153323Srodrigc int sync) 1540153323Srodrigc{ 1541153323Srodrigc xfs_fsblock_t first_block; 1542153323Srodrigc xfs_fileoff_t first_unmap_block; 1543153323Srodrigc xfs_fileoff_t last_block; 1544153323Srodrigc xfs_filblks_t unmap_len=0; 1545153323Srodrigc xfs_mount_t *mp; 1546153323Srodrigc xfs_trans_t *ntp; 1547153323Srodrigc int done; 1548153323Srodrigc int committed; 1549153323Srodrigc xfs_bmap_free_t free_list; 1550153323Srodrigc int error; 1551153323Srodrigc 1552153323Srodrigc ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1553153323Srodrigc ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 1554153323Srodrigc ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size)); 1555153323Srodrigc ASSERT(*tp != NULL); 1556153323Srodrigc ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 1557153323Srodrigc ASSERT(ip->i_transp == *tp); 1558153323Srodrigc ASSERT(ip->i_itemp != NULL); 1559153323Srodrigc ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD); 1560153323Srodrigc 1561153323Srodrigc 1562153323Srodrigc ntp = *tp; 1563153323Srodrigc mp = (ntp)->t_mountp; 1564153323Srodrigc ASSERT(! XFS_NOT_DQATTACHED(mp, ip)); 1565153323Srodrigc 1566153323Srodrigc /* 1567153323Srodrigc * We only support truncating the entire attribute fork. 1568153323Srodrigc */ 1569153323Srodrigc if (fork == XFS_ATTR_FORK) { 1570153323Srodrigc new_size = 0LL; 1571153323Srodrigc } 1572153323Srodrigc first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 1573153323Srodrigc xfs_itrunc_trace(XFS_ITRUNC_FINISH1, ip, 0, new_size, 0, 0); 1574153323Srodrigc /* 1575153323Srodrigc * The first thing we do is set the size to new_size permanently 1576153323Srodrigc * on disk. This way we don't have to worry about anyone ever 1577153323Srodrigc * being able to look at the data being freed even in the face 1578153323Srodrigc * of a crash. What we're getting around here is the case where 1579153323Srodrigc * we free a block, it is allocated to another file, it is written 1580153323Srodrigc * to, and then we crash. If the new data gets written to the 1581153323Srodrigc * file but the log buffers containing the free and reallocation 1582153323Srodrigc * don't, then we'd end up with garbage in the blocks being freed. 1583153323Srodrigc * As long as we make the new_size permanent before actually 1584153323Srodrigc * freeing any blocks it doesn't matter if they get writtten to. 1585153323Srodrigc * 1586153323Srodrigc * The callers must signal into us whether or not the size 1587153323Srodrigc * setting here must be synchronous. There are a few cases 1588153323Srodrigc * where it doesn't have to be synchronous. Those cases 1589153323Srodrigc * occur if the file is unlinked and we know the unlink is 1590153323Srodrigc * permanent or if the blocks being truncated are guaranteed 1591153323Srodrigc * to be beyond the inode eof (regardless of the link count) 1592153323Srodrigc * and the eof value is permanent. Both of these cases occur 1593153323Srodrigc * only on wsync-mounted filesystems. In those cases, we're 1594153323Srodrigc * guaranteed that no user will ever see the data in the blocks 1595153323Srodrigc * that are being truncated so the truncate can run async. 1596153323Srodrigc * In the free beyond eof case, the file may wind up with 1597153323Srodrigc * more blocks allocated to it than it needs if we crash 1598153323Srodrigc * and that won't get fixed until the next time the file 1599153323Srodrigc * is re-opened and closed but that's ok as that shouldn't 1600153323Srodrigc * be too many blocks. 1601153323Srodrigc * 1602153323Srodrigc * However, we can't just make all wsync xactions run async 1603153323Srodrigc * because there's one call out of the create path that needs 1604153323Srodrigc * to run sync where it's truncating an existing file to size 1605153323Srodrigc * 0 whose size is > 0. 1606153323Srodrigc * 1607153323Srodrigc * It's probably possible to come up with a test in this 1608153323Srodrigc * routine that would correctly distinguish all the above 1609153323Srodrigc * cases from the values of the function parameters and the 1610153323Srodrigc * inode state but for sanity's sake, I've decided to let the 1611153323Srodrigc * layers above just tell us. It's simpler to correctly figure 1612153323Srodrigc * out in the layer above exactly under what conditions we 1613153323Srodrigc * can run async and I think it's easier for others read and 1614153323Srodrigc * follow the logic in case something has to be changed. 1615153323Srodrigc * cscope is your friend -- rcc. 1616153323Srodrigc * 1617153323Srodrigc * The attribute fork is much simpler. 1618153323Srodrigc * 1619153323Srodrigc * For the attribute fork we allow the caller to tell us whether 1620153323Srodrigc * the unlink of the inode that led to this call is yet permanent 1621153323Srodrigc * in the on disk log. If it is not and we will be freeing extents 1622153323Srodrigc * in this inode then we make the first transaction synchronous 1623153323Srodrigc * to make sure that the unlink is permanent by the time we free 1624153323Srodrigc * the blocks. 1625153323Srodrigc */ 1626153323Srodrigc if (fork == XFS_DATA_FORK) { 1627153323Srodrigc if (ip->i_d.di_nextents > 0) { 1628153323Srodrigc ip->i_d.di_size = new_size; 1629153323Srodrigc xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1630153323Srodrigc } 1631153323Srodrigc } else if (sync) { 1632153323Srodrigc ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC)); 1633153323Srodrigc if (ip->i_d.di_anextents > 0) 1634153323Srodrigc xfs_trans_set_sync(ntp); 1635153323Srodrigc } 1636153323Srodrigc ASSERT(fork == XFS_DATA_FORK || 1637153323Srodrigc (fork == XFS_ATTR_FORK && 1638153323Srodrigc ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) || 1639153323Srodrigc (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC))))); 1640153323Srodrigc 1641153323Srodrigc /* 1642153323Srodrigc * Since it is possible for space to become allocated beyond 1643153323Srodrigc * the end of the file (in a crash where the space is allocated 1644153323Srodrigc * but the inode size is not yet updated), simply remove any 1645153323Srodrigc * blocks which show up between the new EOF and the maximum 1646153323Srodrigc * possible file size. If the first block to be removed is 1647153323Srodrigc * beyond the maximum file size (ie it is the same as last_block), 1648153323Srodrigc * then there is nothing to do. 1649153323Srodrigc */ 1650153323Srodrigc last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 1651153323Srodrigc ASSERT(first_unmap_block <= last_block); 1652153323Srodrigc done = 0; 1653153323Srodrigc if (last_block == first_unmap_block) { 1654153323Srodrigc done = 1; 1655153323Srodrigc } else { 1656153323Srodrigc unmap_len = last_block - first_unmap_block + 1; 1657153323Srodrigc } 1658153323Srodrigc while (!done) { 1659153323Srodrigc /* 1660153323Srodrigc * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi() 1661153323Srodrigc * will tell us whether it freed the entire range or 1662153323Srodrigc * not. If this is a synchronous mount (wsync), 1663153323Srodrigc * then we can tell bunmapi to keep all the 1664153323Srodrigc * transactions asynchronous since the unlink 1665153323Srodrigc * transaction that made this inode inactive has 1666153323Srodrigc * already hit the disk. There's no danger of 1667153323Srodrigc * the freed blocks being reused, there being a 1668153323Srodrigc * crash, and the reused blocks suddenly reappearing 1669153323Srodrigc * in this file with garbage in them once recovery 1670153323Srodrigc * runs. 1671153323Srodrigc */ 1672153323Srodrigc XFS_BMAP_INIT(&free_list, &first_block); 1673159451Srodrigc error = XFS_BUNMAPI(mp, ntp, &ip->i_iocore, 1674159451Srodrigc first_unmap_block, unmap_len, 1675153323Srodrigc XFS_BMAPI_AFLAG(fork) | 1676153323Srodrigc (sync ? 0 : XFS_BMAPI_ASYNC), 1677153323Srodrigc XFS_ITRUNC_MAX_EXTENTS, 1678159451Srodrigc &first_block, &free_list, 1679159451Srodrigc NULL, &done); 1680153323Srodrigc if (error) { 1681153323Srodrigc /* 1682153323Srodrigc * If the bunmapi call encounters an error, 1683153323Srodrigc * return to the caller where the transaction 1684153323Srodrigc * can be properly aborted. We just need to 1685153323Srodrigc * make sure we're not holding any resources 1686153323Srodrigc * that we were not when we came in. 1687153323Srodrigc */ 1688153323Srodrigc xfs_bmap_cancel(&free_list); 1689153323Srodrigc return error; 1690153323Srodrigc } 1691153323Srodrigc 1692153323Srodrigc /* 1693153323Srodrigc * Duplicate the transaction that has the permanent 1694153323Srodrigc * reservation and commit the old transaction. 1695153323Srodrigc */ 1696153323Srodrigc error = xfs_bmap_finish(tp, &free_list, first_block, 1697153323Srodrigc &committed); 1698153323Srodrigc ntp = *tp; 1699153323Srodrigc if (error) { 1700153323Srodrigc /* 1701153323Srodrigc * If the bmap finish call encounters an error, 1702153323Srodrigc * return to the caller where the transaction 1703153323Srodrigc * can be properly aborted. We just need to 1704153323Srodrigc * make sure we're not holding any resources 1705153323Srodrigc * that we were not when we came in. 1706153323Srodrigc * 1707153323Srodrigc * Aborting from this point might lose some 1708153323Srodrigc * blocks in the file system, but oh well. 1709153323Srodrigc */ 1710153323Srodrigc xfs_bmap_cancel(&free_list); 1711153323Srodrigc if (committed) { 1712153323Srodrigc /* 1713153323Srodrigc * If the passed in transaction committed 1714153323Srodrigc * in xfs_bmap_finish(), then we want to 1715153323Srodrigc * add the inode to this one before returning. 1716153323Srodrigc * This keeps things simple for the higher 1717153323Srodrigc * level code, because it always knows that 1718153323Srodrigc * the inode is locked and held in the 1719153323Srodrigc * transaction that returns to it whether 1720153323Srodrigc * errors occur or not. We don't mark the 1721153323Srodrigc * inode dirty so that this transaction can 1722153323Srodrigc * be easily aborted if possible. 1723153323Srodrigc */ 1724153323Srodrigc xfs_trans_ijoin(ntp, ip, 1725153323Srodrigc XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1726153323Srodrigc xfs_trans_ihold(ntp, ip); 1727153323Srodrigc } 1728153323Srodrigc return error; 1729153323Srodrigc } 1730153323Srodrigc 1731153323Srodrigc if (committed) { 1732153323Srodrigc /* 1733153323Srodrigc * The first xact was committed, 1734153323Srodrigc * so add the inode to the new one. 1735153323Srodrigc * Mark it dirty so it will be logged 1736153323Srodrigc * and moved forward in the log as 1737153323Srodrigc * part of every commit. 1738153323Srodrigc */ 1739153323Srodrigc xfs_trans_ijoin(ntp, ip, 1740153323Srodrigc XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1741153323Srodrigc xfs_trans_ihold(ntp, ip); 1742153323Srodrigc xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1743153323Srodrigc } 1744153323Srodrigc ntp = xfs_trans_dup(ntp); 1745153323Srodrigc (void) xfs_trans_commit(*tp, 0, NULL); 1746153323Srodrigc *tp = ntp; 1747153323Srodrigc error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 1748153323Srodrigc XFS_TRANS_PERM_LOG_RES, 1749153323Srodrigc XFS_ITRUNCATE_LOG_COUNT); 1750153323Srodrigc /* 1751153323Srodrigc * Add the inode being truncated to the next chained 1752153323Srodrigc * transaction. 1753153323Srodrigc */ 1754153323Srodrigc xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1755153323Srodrigc xfs_trans_ihold(ntp, ip); 1756153323Srodrigc if (error) 1757153323Srodrigc return (error); 1758153323Srodrigc } 1759153323Srodrigc /* 1760153323Srodrigc * Only update the size in the case of the data fork, but 1761153323Srodrigc * always re-log the inode so that our permanent transaction 1762153323Srodrigc * can keep on rolling it forward in the log. 1763153323Srodrigc */ 1764153323Srodrigc if (fork == XFS_DATA_FORK) { 1765153323Srodrigc xfs_isize_check(mp, ip, new_size); 1766153323Srodrigc ip->i_d.di_size = new_size; 1767153323Srodrigc } 1768153323Srodrigc xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1769153323Srodrigc ASSERT((new_size != 0) || 1770153323Srodrigc (fork == XFS_ATTR_FORK) || 1771153323Srodrigc (ip->i_delayed_blks == 0)); 1772153323Srodrigc ASSERT((new_size != 0) || 1773153323Srodrigc (fork == XFS_ATTR_FORK) || 1774153323Srodrigc (ip->i_d.di_nextents == 0)); 1775153323Srodrigc xfs_itrunc_trace(XFS_ITRUNC_FINISH2, ip, 0, new_size, 0, 0); 1776153323Srodrigc return 0; 1777153323Srodrigc} 1778153323Srodrigc 1779153323Srodrigc 1780153323Srodrigc/* 1781153323Srodrigc * xfs_igrow_start 1782153323Srodrigc * 1783153323Srodrigc * Do the first part of growing a file: zero any data in the last 1784153323Srodrigc * block that is beyond the old EOF. We need to do this before 1785153323Srodrigc * the inode is joined to the transaction to modify the i_size. 1786153323Srodrigc * That way we can drop the inode lock and call into the buffer 1787153323Srodrigc * cache to get the buffer mapping the EOF. 1788153323Srodrigc */ 1789153323Srodrigcint 1790153323Srodrigcxfs_igrow_start( 1791153323Srodrigc xfs_inode_t *ip, 1792153323Srodrigc xfs_fsize_t new_size, 1793153323Srodrigc cred_t *credp) 1794153323Srodrigc{ 1795153323Srodrigc int error; 1796153323Srodrigc 1797153323Srodrigc ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1798153323Srodrigc ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); 1799153323Srodrigc ASSERT(new_size > ip->i_d.di_size); 1800153323Srodrigc 1801153323Srodrigc /* 1802153323Srodrigc * Zero any pages that may have been created by 1803153323Srodrigc * xfs_write_file() beyond the end of the file 1804153323Srodrigc * and any blocks between the old and new file sizes. 1805153323Srodrigc */ 1806159451Srodrigc error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, 1807159451Srodrigc ip->i_d.di_size, new_size); 1808153323Srodrigc return error; 1809153323Srodrigc} 1810153323Srodrigc 1811153323Srodrigc/* 1812153323Srodrigc * xfs_igrow_finish 1813153323Srodrigc * 1814153323Srodrigc * This routine is called to extend the size of a file. 1815153323Srodrigc * The inode must have both the iolock and the ilock locked 1816153323Srodrigc * for update and it must be a part of the current transaction. 1817153323Srodrigc * The xfs_igrow_start() function must have been called previously. 1818153323Srodrigc * If the change_flag is not zero, the inode change timestamp will 1819153323Srodrigc * be updated. 1820153323Srodrigc */ 1821153323Srodrigcvoid 1822153323Srodrigcxfs_igrow_finish( 1823153323Srodrigc xfs_trans_t *tp, 1824153323Srodrigc xfs_inode_t *ip, 1825153323Srodrigc xfs_fsize_t new_size, 1826153323Srodrigc int change_flag) 1827153323Srodrigc{ 1828153323Srodrigc ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1829153323Srodrigc ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); 1830153323Srodrigc ASSERT(ip->i_transp == tp); 1831153323Srodrigc ASSERT(new_size > ip->i_d.di_size); 1832153323Srodrigc 1833153323Srodrigc /* 1834153323Srodrigc * Update the file size. Update the inode change timestamp 1835153323Srodrigc * if change_flag set. 1836153323Srodrigc */ 1837153323Srodrigc ip->i_d.di_size = new_size; 1838153323Srodrigc if (change_flag) 1839153323Srodrigc xfs_ichgtime(ip, XFS_ICHGTIME_CHG); 1840153323Srodrigc xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1841153323Srodrigc 1842153323Srodrigc} 1843153323Srodrigc 1844153323Srodrigc 1845153323Srodrigc/* 1846153323Srodrigc * This is called when the inode's link count goes to 0. 1847153323Srodrigc * We place the on-disk inode on a list in the AGI. It 1848153323Srodrigc * will be pulled from this list when the inode is freed. 1849153323Srodrigc */ 1850153323Srodrigcint 1851153323Srodrigcxfs_iunlink( 1852153323Srodrigc xfs_trans_t *tp, 1853153323Srodrigc xfs_inode_t *ip) 1854153323Srodrigc{ 1855153323Srodrigc xfs_mount_t *mp; 1856153323Srodrigc xfs_agi_t *agi; 1857153323Srodrigc xfs_dinode_t *dip; 1858153323Srodrigc xfs_buf_t *agibp; 1859153323Srodrigc xfs_buf_t *ibp; 1860153323Srodrigc xfs_agnumber_t agno; 1861153323Srodrigc xfs_daddr_t agdaddr; 1862153323Srodrigc xfs_agino_t agino; 1863153323Srodrigc short bucket_index; 1864153323Srodrigc int offset; 1865153323Srodrigc int error; 1866153323Srodrigc int agi_ok; 1867153323Srodrigc 1868153323Srodrigc ASSERT(ip->i_d.di_nlink == 0); 1869153323Srodrigc ASSERT(ip->i_d.di_mode != 0); 1870153323Srodrigc ASSERT(ip->i_transp == tp); 1871153323Srodrigc 1872153323Srodrigc mp = tp->t_mountp; 1873153323Srodrigc 1874153323Srodrigc agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 1875153323Srodrigc agdaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)); 1876153323Srodrigc 1877153323Srodrigc /* 1878153323Srodrigc * Get the agi buffer first. It ensures lock ordering 1879153323Srodrigc * on the list. 1880153323Srodrigc */ 1881153323Srodrigc error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr, 1882153323Srodrigc XFS_FSS_TO_BB(mp, 1), 0, &agibp); 1883153323Srodrigc if (error) { 1884153323Srodrigc return error; 1885153323Srodrigc } 1886153323Srodrigc /* 1887153323Srodrigc * Validate the magic number of the agi block. 1888153323Srodrigc */ 1889153323Srodrigc agi = XFS_BUF_TO_AGI(agibp); 1890153323Srodrigc agi_ok = 1891159451Srodrigc be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && 1892159451Srodrigc XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)); 1893153323Srodrigc if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK, 1894153323Srodrigc XFS_RANDOM_IUNLINK))) { 1895153323Srodrigc XFS_CORRUPTION_ERROR("xfs_iunlink", XFS_ERRLEVEL_LOW, mp, agi); 1896153323Srodrigc xfs_trans_brelse(tp, agibp); 1897153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 1898153323Srodrigc } 1899153323Srodrigc /* 1900153323Srodrigc * Get the index into the agi hash table for the 1901153323Srodrigc * list this inode will go on. 1902153323Srodrigc */ 1903153323Srodrigc agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 1904153323Srodrigc ASSERT(agino != 0); 1905153323Srodrigc bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 1906159451Srodrigc ASSERT(agi->agi_unlinked[bucket_index]); 1907159451Srodrigc ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); 1908153323Srodrigc 1909159451Srodrigc if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) { 1910153323Srodrigc /* 1911153323Srodrigc * There is already another inode in the bucket we need 1912153323Srodrigc * to add ourselves to. Add us at the front of the list. 1913153323Srodrigc * Here we put the head pointer into our next pointer, 1914153323Srodrigc * and then we fall through to point the head at us. 1915153323Srodrigc */ 1916159451Srodrigc error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); 1917153323Srodrigc if (error) { 1918153323Srodrigc return error; 1919153323Srodrigc } 1920153323Srodrigc ASSERT(INT_GET(dip->di_next_unlinked, ARCH_CONVERT) == NULLAGINO); 1921159451Srodrigc ASSERT(dip->di_next_unlinked); 1922153323Srodrigc /* both on-disk, don't endian flip twice */ 1923153323Srodrigc dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; 1924153323Srodrigc offset = ip->i_boffset + 1925153323Srodrigc offsetof(xfs_dinode_t, di_next_unlinked); 1926153323Srodrigc xfs_trans_inode_buf(tp, ibp); 1927153323Srodrigc xfs_trans_log_buf(tp, ibp, offset, 1928153323Srodrigc (offset + sizeof(xfs_agino_t) - 1)); 1929153323Srodrigc xfs_inobp_check(mp, ibp); 1930153323Srodrigc } 1931153323Srodrigc 1932153323Srodrigc /* 1933153323Srodrigc * Point the bucket head pointer at the inode being inserted. 1934153323Srodrigc */ 1935153323Srodrigc ASSERT(agino != 0); 1936159451Srodrigc agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); 1937153323Srodrigc offset = offsetof(xfs_agi_t, agi_unlinked) + 1938153323Srodrigc (sizeof(xfs_agino_t) * bucket_index); 1939153323Srodrigc xfs_trans_log_buf(tp, agibp, offset, 1940153323Srodrigc (offset + sizeof(xfs_agino_t) - 1)); 1941153323Srodrigc return 0; 1942153323Srodrigc} 1943153323Srodrigc 1944153323Srodrigc/* 1945153323Srodrigc * Pull the on-disk inode from the AGI unlinked list. 1946153323Srodrigc */ 1947153323SrodrigcSTATIC int 1948153323Srodrigcxfs_iunlink_remove( 1949153323Srodrigc xfs_trans_t *tp, 1950153323Srodrigc xfs_inode_t *ip) 1951153323Srodrigc{ 1952153323Srodrigc xfs_ino_t next_ino; 1953153323Srodrigc xfs_mount_t *mp; 1954153323Srodrigc xfs_agi_t *agi; 1955153323Srodrigc xfs_dinode_t *dip; 1956153323Srodrigc xfs_buf_t *agibp; 1957153323Srodrigc xfs_buf_t *ibp; 1958153323Srodrigc xfs_agnumber_t agno; 1959153323Srodrigc xfs_daddr_t agdaddr; 1960153323Srodrigc xfs_agino_t agino; 1961153323Srodrigc xfs_agino_t next_agino; 1962153323Srodrigc xfs_buf_t *last_ibp; 1963170124Skan xfs_dinode_t *last_dip = NULL; 1964153323Srodrigc short bucket_index; 1965170124Skan int offset, last_offset = 0; 1966153323Srodrigc int error; 1967153323Srodrigc int agi_ok; 1968153323Srodrigc 1969153323Srodrigc /* 1970153323Srodrigc * First pull the on-disk inode from the AGI unlinked list. 1971153323Srodrigc */ 1972153323Srodrigc mp = tp->t_mountp; 1973153323Srodrigc 1974153323Srodrigc agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 1975153323Srodrigc agdaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)); 1976153323Srodrigc 1977153323Srodrigc /* 1978153323Srodrigc * Get the agi buffer first. It ensures lock ordering 1979153323Srodrigc * on the list. 1980153323Srodrigc */ 1981153323Srodrigc error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr, 1982153323Srodrigc XFS_FSS_TO_BB(mp, 1), 0, &agibp); 1983153323Srodrigc if (error) { 1984153323Srodrigc cmn_err(CE_WARN, 1985153323Srodrigc "xfs_iunlink_remove: xfs_trans_read_buf() returned an error %d on %s. Returning error.", 1986153323Srodrigc error, mp->m_fsname); 1987153323Srodrigc return error; 1988153323Srodrigc } 1989153323Srodrigc /* 1990153323Srodrigc * Validate the magic number of the agi block. 1991153323Srodrigc */ 1992153323Srodrigc agi = XFS_BUF_TO_AGI(agibp); 1993153323Srodrigc agi_ok = 1994159451Srodrigc be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && 1995159451Srodrigc XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)); 1996153323Srodrigc if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK_REMOVE, 1997153323Srodrigc XFS_RANDOM_IUNLINK_REMOVE))) { 1998153323Srodrigc XFS_CORRUPTION_ERROR("xfs_iunlink_remove", XFS_ERRLEVEL_LOW, 1999153323Srodrigc mp, agi); 2000153323Srodrigc xfs_trans_brelse(tp, agibp); 2001153323Srodrigc cmn_err(CE_WARN, 2002153323Srodrigc "xfs_iunlink_remove: XFS_TEST_ERROR() returned an error on %s. Returning EFSCORRUPTED.", 2003153323Srodrigc mp->m_fsname); 2004153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 2005153323Srodrigc } 2006153323Srodrigc /* 2007153323Srodrigc * Get the index into the agi hash table for the 2008153323Srodrigc * list this inode will go on. 2009153323Srodrigc */ 2010153323Srodrigc agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 2011153323Srodrigc ASSERT(agino != 0); 2012153323Srodrigc bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 2013159451Srodrigc ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO); 2014159451Srodrigc ASSERT(agi->agi_unlinked[bucket_index]); 2015153323Srodrigc 2016159451Srodrigc if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { 2017153323Srodrigc /* 2018153323Srodrigc * We're at the head of the list. Get the inode's 2019153323Srodrigc * on-disk buffer to see if there is anyone after us 2020153323Srodrigc * on the list. Only modify our next pointer if it 2021153323Srodrigc * is not already NULLAGINO. This saves us the overhead 2022153323Srodrigc * of dealing with the buffer when there is no need to 2023153323Srodrigc * change it. 2024153323Srodrigc */ 2025159451Srodrigc error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); 2026153323Srodrigc if (error) { 2027153323Srodrigc cmn_err(CE_WARN, 2028153323Srodrigc "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 2029153323Srodrigc error, mp->m_fsname); 2030153323Srodrigc return error; 2031153323Srodrigc } 2032153323Srodrigc next_agino = INT_GET(dip->di_next_unlinked, ARCH_CONVERT); 2033153323Srodrigc ASSERT(next_agino != 0); 2034153323Srodrigc if (next_agino != NULLAGINO) { 2035153323Srodrigc INT_SET(dip->di_next_unlinked, ARCH_CONVERT, NULLAGINO); 2036153323Srodrigc offset = ip->i_boffset + 2037153323Srodrigc offsetof(xfs_dinode_t, di_next_unlinked); 2038153323Srodrigc xfs_trans_inode_buf(tp, ibp); 2039153323Srodrigc xfs_trans_log_buf(tp, ibp, offset, 2040153323Srodrigc (offset + sizeof(xfs_agino_t) - 1)); 2041153323Srodrigc xfs_inobp_check(mp, ibp); 2042153323Srodrigc } else { 2043153323Srodrigc xfs_trans_brelse(tp, ibp); 2044153323Srodrigc } 2045153323Srodrigc /* 2046153323Srodrigc * Point the bucket head pointer at the next inode. 2047153323Srodrigc */ 2048153323Srodrigc ASSERT(next_agino != 0); 2049153323Srodrigc ASSERT(next_agino != agino); 2050159451Srodrigc agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); 2051153323Srodrigc offset = offsetof(xfs_agi_t, agi_unlinked) + 2052153323Srodrigc (sizeof(xfs_agino_t) * bucket_index); 2053153323Srodrigc xfs_trans_log_buf(tp, agibp, offset, 2054153323Srodrigc (offset + sizeof(xfs_agino_t) - 1)); 2055153323Srodrigc } else { 2056153323Srodrigc /* 2057153323Srodrigc * We need to search the list for the inode being freed. 2058153323Srodrigc */ 2059159451Srodrigc next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 2060153323Srodrigc last_ibp = NULL; 2061153323Srodrigc while (next_agino != agino) { 2062153323Srodrigc /* 2063153323Srodrigc * If the last inode wasn't the one pointing to 2064153323Srodrigc * us, then release its buffer since we're not 2065153323Srodrigc * going to do anything with it. 2066153323Srodrigc */ 2067153323Srodrigc if (last_ibp != NULL) { 2068153323Srodrigc xfs_trans_brelse(tp, last_ibp); 2069153323Srodrigc } 2070153323Srodrigc next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); 2071153323Srodrigc error = xfs_inotobp(mp, tp, next_ino, &last_dip, 2072153323Srodrigc &last_ibp, &last_offset); 2073153323Srodrigc if (error) { 2074153323Srodrigc cmn_err(CE_WARN, 2075153323Srodrigc "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", 2076153323Srodrigc error, mp->m_fsname); 2077153323Srodrigc return error; 2078153323Srodrigc } 2079153323Srodrigc next_agino = INT_GET(last_dip->di_next_unlinked, ARCH_CONVERT); 2080153323Srodrigc ASSERT(next_agino != NULLAGINO); 2081153323Srodrigc ASSERT(next_agino != 0); 2082153323Srodrigc } 2083153323Srodrigc /* 2084153323Srodrigc * Now last_ibp points to the buffer previous to us on 2085153323Srodrigc * the unlinked list. Pull us from the list. 2086153323Srodrigc */ 2087159451Srodrigc error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); 2088153323Srodrigc if (error) { 2089153323Srodrigc cmn_err(CE_WARN, 2090153323Srodrigc "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 2091153323Srodrigc error, mp->m_fsname); 2092153323Srodrigc return error; 2093153323Srodrigc } 2094153323Srodrigc next_agino = INT_GET(dip->di_next_unlinked, ARCH_CONVERT); 2095153323Srodrigc ASSERT(next_agino != 0); 2096153323Srodrigc ASSERT(next_agino != agino); 2097153323Srodrigc if (next_agino != NULLAGINO) { 2098153323Srodrigc INT_SET(dip->di_next_unlinked, ARCH_CONVERT, NULLAGINO); 2099153323Srodrigc offset = ip->i_boffset + 2100153323Srodrigc offsetof(xfs_dinode_t, di_next_unlinked); 2101153323Srodrigc xfs_trans_inode_buf(tp, ibp); 2102153323Srodrigc xfs_trans_log_buf(tp, ibp, offset, 2103153323Srodrigc (offset + sizeof(xfs_agino_t) - 1)); 2104153323Srodrigc xfs_inobp_check(mp, ibp); 2105153323Srodrigc } else { 2106153323Srodrigc xfs_trans_brelse(tp, ibp); 2107153323Srodrigc } 2108153323Srodrigc /* 2109153323Srodrigc * Point the previous inode on the list to the next inode. 2110153323Srodrigc */ 2111153323Srodrigc INT_SET(last_dip->di_next_unlinked, ARCH_CONVERT, next_agino); 2112153323Srodrigc ASSERT(next_agino != 0); 2113153323Srodrigc offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); 2114153323Srodrigc xfs_trans_inode_buf(tp, last_ibp); 2115153323Srodrigc xfs_trans_log_buf(tp, last_ibp, offset, 2116153323Srodrigc (offset + sizeof(xfs_agino_t) - 1)); 2117153323Srodrigc xfs_inobp_check(mp, last_ibp); 2118153323Srodrigc } 2119153323Srodrigc return 0; 2120153323Srodrigc} 2121153323Srodrigc 2122153323Srodrigcstatic __inline__ int xfs_inode_clean(xfs_inode_t *ip) 2123153323Srodrigc{ 2124153323Srodrigc return (((ip->i_itemp == NULL) || 2125153323Srodrigc !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && 2126153323Srodrigc (ip->i_update_core == 0)); 2127153323Srodrigc} 2128153323Srodrigc 2129153323SrodrigcSTATIC void 2130153323Srodrigcxfs_ifree_cluster( 2131153323Srodrigc xfs_inode_t *free_ip, 2132153323Srodrigc xfs_trans_t *tp, 2133153323Srodrigc xfs_ino_t inum) 2134153323Srodrigc{ 2135153323Srodrigc xfs_mount_t *mp = free_ip->i_mount; 2136153323Srodrigc int blks_per_cluster; 2137153323Srodrigc int nbufs; 2138153323Srodrigc int ninodes; 2139153323Srodrigc int i, j, found, pre_flushed; 2140153323Srodrigc xfs_daddr_t blkno; 2141153323Srodrigc xfs_buf_t *bp; 2142153323Srodrigc xfs_ihash_t *ih; 2143153323Srodrigc xfs_inode_t *ip, **ip_found; 2144153323Srodrigc xfs_inode_log_item_t *iip; 2145153323Srodrigc xfs_log_item_t *lip; 2146153323Srodrigc SPLDECL(s); 2147153323Srodrigc 2148153323Srodrigc if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 2149153323Srodrigc blks_per_cluster = 1; 2150153323Srodrigc ninodes = mp->m_sb.sb_inopblock; 2151153323Srodrigc nbufs = XFS_IALLOC_BLOCKS(mp); 2152153323Srodrigc } else { 2153153323Srodrigc blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / 2154153323Srodrigc mp->m_sb.sb_blocksize; 2155153323Srodrigc ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; 2156153323Srodrigc nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; 2157153323Srodrigc } 2158153323Srodrigc 2159153323Srodrigc ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS); 2160153323Srodrigc 2161153323Srodrigc for (j = 0; j < nbufs; j++, inum += ninodes) { 2162153323Srodrigc blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 2163153323Srodrigc XFS_INO_TO_AGBNO(mp, inum)); 2164153323Srodrigc 2165153323Srodrigc 2166153323Srodrigc /* 2167153323Srodrigc * Look for each inode in memory and attempt to lock it, 2168153323Srodrigc * we can be racing with flush and tail pushing here. 2169153323Srodrigc * any inode we get the locks on, add to an array of 2170153323Srodrigc * inode items to process later. 2171153323Srodrigc * 2172153323Srodrigc * The get the buffer lock, we could beat a flush 2173153323Srodrigc * or tail pushing thread to the lock here, in which 2174153323Srodrigc * case they will go looking for the inode buffer 2175153323Srodrigc * and fail, we need some other form of interlock 2176153323Srodrigc * here. 2177153323Srodrigc */ 2178153323Srodrigc found = 0; 2179153323Srodrigc for (i = 0; i < ninodes; i++) { 2180153323Srodrigc ih = XFS_IHASH(mp, inum + i); 2181153323Srodrigc read_lock(&ih->ih_lock); 2182153323Srodrigc for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) { 2183153323Srodrigc if (ip->i_ino == inum + i) 2184153323Srodrigc break; 2185153323Srodrigc } 2186153323Srodrigc 2187153323Srodrigc /* Inode not in memory or we found it already, 2188153323Srodrigc * nothing to do 2189153323Srodrigc */ 2190153323Srodrigc if (!ip || (ip->i_flags & XFS_ISTALE)) { 2191153323Srodrigc read_unlock(&ih->ih_lock); 2192153323Srodrigc continue; 2193153323Srodrigc } 2194153323Srodrigc 2195153323Srodrigc if (xfs_inode_clean(ip)) { 2196153323Srodrigc read_unlock(&ih->ih_lock); 2197153323Srodrigc continue; 2198153323Srodrigc } 2199153323Srodrigc 2200153323Srodrigc /* If we can get the locks then add it to the 2201153323Srodrigc * list, otherwise by the time we get the bp lock 2202153323Srodrigc * below it will already be attached to the 2203153323Srodrigc * inode buffer. 2204153323Srodrigc */ 2205153323Srodrigc 2206153323Srodrigc /* This inode will already be locked - by us, lets 2207153323Srodrigc * keep it that way. 2208153323Srodrigc */ 2209153323Srodrigc 2210153323Srodrigc if (ip == free_ip) { 2211153323Srodrigc if (xfs_iflock_nowait(ip)) { 2212153323Srodrigc ip->i_flags |= XFS_ISTALE; 2213153323Srodrigc 2214153323Srodrigc if (xfs_inode_clean(ip)) { 2215153323Srodrigc xfs_ifunlock(ip); 2216153323Srodrigc } else { 2217153323Srodrigc ip_found[found++] = ip; 2218153323Srodrigc } 2219153323Srodrigc } 2220153323Srodrigc read_unlock(&ih->ih_lock); 2221153323Srodrigc continue; 2222153323Srodrigc } 2223153323Srodrigc 2224153323Srodrigc if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2225153323Srodrigc if (xfs_iflock_nowait(ip)) { 2226153323Srodrigc ip->i_flags |= XFS_ISTALE; 2227153323Srodrigc 2228153323Srodrigc if (xfs_inode_clean(ip)) { 2229153323Srodrigc xfs_ifunlock(ip); 2230153323Srodrigc xfs_iunlock(ip, XFS_ILOCK_EXCL); 2231153323Srodrigc } else { 2232153323Srodrigc ip_found[found++] = ip; 2233153323Srodrigc } 2234153323Srodrigc } else { 2235153323Srodrigc xfs_iunlock(ip, XFS_ILOCK_EXCL); 2236153323Srodrigc } 2237153323Srodrigc } 2238153323Srodrigc 2239153323Srodrigc read_unlock(&ih->ih_lock); 2240153323Srodrigc } 2241153323Srodrigc 2242153323Srodrigc bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2243153323Srodrigc mp->m_bsize * blks_per_cluster, 2244153323Srodrigc XFS_BUF_LOCK); 2245153323Srodrigc 2246153323Srodrigc pre_flushed = 0; 2247153323Srodrigc lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 2248153323Srodrigc while (lip) { 2249153323Srodrigc if (lip->li_type == XFS_LI_INODE) { 2250153323Srodrigc iip = (xfs_inode_log_item_t *)lip; 2251153323Srodrigc ASSERT(iip->ili_logged == 1); 2252153323Srodrigc lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; 2253153323Srodrigc AIL_LOCK(mp,s); 2254153323Srodrigc iip->ili_flush_lsn = iip->ili_item.li_lsn; 2255153323Srodrigc AIL_UNLOCK(mp, s); 2256153323Srodrigc iip->ili_inode->i_flags |= XFS_ISTALE; 2257153323Srodrigc pre_flushed++; 2258153323Srodrigc } 2259153323Srodrigc lip = lip->li_bio_list; 2260153323Srodrigc } 2261153323Srodrigc 2262153323Srodrigc for (i = 0; i < found; i++) { 2263153323Srodrigc ip = ip_found[i]; 2264153323Srodrigc iip = ip->i_itemp; 2265153323Srodrigc 2266153323Srodrigc if (!iip) { 2267153323Srodrigc ip->i_update_core = 0; 2268153323Srodrigc xfs_ifunlock(ip); 2269153323Srodrigc xfs_iunlock(ip, XFS_ILOCK_EXCL); 2270153323Srodrigc continue; 2271153323Srodrigc } 2272153323Srodrigc 2273153323Srodrigc iip->ili_last_fields = iip->ili_format.ilf_fields; 2274153323Srodrigc iip->ili_format.ilf_fields = 0; 2275153323Srodrigc iip->ili_logged = 1; 2276153323Srodrigc AIL_LOCK(mp,s); 2277153323Srodrigc iip->ili_flush_lsn = iip->ili_item.li_lsn; 2278153323Srodrigc AIL_UNLOCK(mp, s); 2279153323Srodrigc 2280153323Srodrigc xfs_buf_attach_iodone(bp, 2281153323Srodrigc (void(*)(xfs_buf_t*,xfs_log_item_t*)) 2282153323Srodrigc xfs_istale_done, (xfs_log_item_t *)iip); 2283153323Srodrigc if (ip != free_ip) { 2284153323Srodrigc xfs_iunlock(ip, XFS_ILOCK_EXCL); 2285153323Srodrigc } 2286153323Srodrigc } 2287153323Srodrigc 2288153323Srodrigc if (found || pre_flushed) 2289153323Srodrigc xfs_trans_stale_inode_buf(tp, bp); 2290153323Srodrigc xfs_trans_binval(tp, bp); 2291153323Srodrigc } 2292153323Srodrigc 2293153323Srodrigc kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *)); 2294153323Srodrigc} 2295153323Srodrigc 2296153323Srodrigc/* 2297153323Srodrigc * This is called to return an inode to the inode free list. 2298153323Srodrigc * The inode should already be truncated to 0 length and have 2299153323Srodrigc * no pages associated with it. This routine also assumes that 2300153323Srodrigc * the inode is already a part of the transaction. 2301153323Srodrigc * 2302153323Srodrigc * The on-disk copy of the inode will have been added to the list 2303153323Srodrigc * of unlinked inodes in the AGI. We need to remove the inode from 2304153323Srodrigc * that list atomically with respect to freeing it here. 2305153323Srodrigc */ 2306153323Srodrigcint 2307153323Srodrigcxfs_ifree( 2308153323Srodrigc xfs_trans_t *tp, 2309153323Srodrigc xfs_inode_t *ip, 2310153323Srodrigc xfs_bmap_free_t *flist) 2311153323Srodrigc{ 2312153323Srodrigc int error; 2313153323Srodrigc int delete; 2314153323Srodrigc xfs_ino_t first_ino; 2315153323Srodrigc 2316153323Srodrigc ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 2317153323Srodrigc ASSERT(ip->i_transp == tp); 2318153323Srodrigc ASSERT(ip->i_d.di_nlink == 0); 2319153323Srodrigc ASSERT(ip->i_d.di_nextents == 0); 2320153323Srodrigc ASSERT(ip->i_d.di_anextents == 0); 2321153323Srodrigc ASSERT((ip->i_d.di_size == 0) || 2322153323Srodrigc ((ip->i_d.di_mode & S_IFMT) != S_IFREG)); 2323153323Srodrigc ASSERT(ip->i_d.di_nblocks == 0); 2324153323Srodrigc 2325153323Srodrigc /* 2326153323Srodrigc * Pull the on-disk inode from the AGI unlinked list. 2327153323Srodrigc */ 2328153323Srodrigc error = xfs_iunlink_remove(tp, ip); 2329153323Srodrigc if (error != 0) { 2330153323Srodrigc return error; 2331153323Srodrigc } 2332153323Srodrigc 2333153323Srodrigc error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); 2334153323Srodrigc if (error != 0) { 2335153323Srodrigc return error; 2336153323Srodrigc } 2337153323Srodrigc ip->i_d.di_mode = 0; /* mark incore inode as free */ 2338153323Srodrigc ip->i_d.di_flags = 0; 2339153323Srodrigc ip->i_d.di_dmevmask = 0; 2340153323Srodrigc ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 2341153323Srodrigc ip->i_df.if_ext_max = 2342153323Srodrigc XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 2343153323Srodrigc ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 2344153323Srodrigc ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 2345153323Srodrigc /* 2346153323Srodrigc * Bump the generation count so no one will be confused 2347153323Srodrigc * by reincarnations of this inode. 2348153323Srodrigc */ 2349153323Srodrigc ip->i_d.di_gen++; 2350153323Srodrigc xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2351153323Srodrigc 2352153323Srodrigc if (delete) { 2353153323Srodrigc xfs_ifree_cluster(ip, tp, first_ino); 2354153323Srodrigc } 2355153323Srodrigc 2356153323Srodrigc return 0; 2357153323Srodrigc} 2358153323Srodrigc 2359153323Srodrigc/* 2360153323Srodrigc * Reallocate the space for if_broot based on the number of records 2361153323Srodrigc * being added or deleted as indicated in rec_diff. Move the records 2362153323Srodrigc * and pointers in if_broot to fit the new size. When shrinking this 2363153323Srodrigc * will eliminate holes between the records and pointers created by 2364153323Srodrigc * the caller. When growing this will create holes to be filled in 2365153323Srodrigc * by the caller. 2366153323Srodrigc * 2367153323Srodrigc * The caller must not request to add more records than would fit in 2368153323Srodrigc * the on-disk inode root. If the if_broot is currently NULL, then 2369153323Srodrigc * if we adding records one will be allocated. The caller must also 2370153323Srodrigc * not request that the number of records go below zero, although 2371153323Srodrigc * it can go to zero. 2372153323Srodrigc * 2373153323Srodrigc * ip -- the inode whose if_broot area is changing 2374153323Srodrigc * ext_diff -- the change in the number of records, positive or negative, 2375153323Srodrigc * requested for the if_broot array. 2376153323Srodrigc */ 2377153323Srodrigcvoid 2378153323Srodrigcxfs_iroot_realloc( 2379153323Srodrigc xfs_inode_t *ip, 2380153323Srodrigc int rec_diff, 2381153323Srodrigc int whichfork) 2382153323Srodrigc{ 2383153323Srodrigc int cur_max; 2384153323Srodrigc xfs_ifork_t *ifp; 2385153323Srodrigc xfs_bmbt_block_t *new_broot; 2386153323Srodrigc int new_max; 2387153323Srodrigc size_t new_size; 2388153323Srodrigc char *np; 2389153323Srodrigc char *op; 2390153323Srodrigc 2391153323Srodrigc /* 2392153323Srodrigc * Handle the degenerate case quietly. 2393153323Srodrigc */ 2394153323Srodrigc if (rec_diff == 0) { 2395153323Srodrigc return; 2396153323Srodrigc } 2397153323Srodrigc 2398153323Srodrigc ifp = XFS_IFORK_PTR(ip, whichfork); 2399153323Srodrigc if (rec_diff > 0) { 2400153323Srodrigc /* 2401153323Srodrigc * If there wasn't any memory allocated before, just 2402153323Srodrigc * allocate it now and get out. 2403153323Srodrigc */ 2404153323Srodrigc if (ifp->if_broot_bytes == 0) { 2405153323Srodrigc new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); 2406153323Srodrigc ifp->if_broot = (xfs_bmbt_block_t*)kmem_alloc(new_size, 2407153323Srodrigc KM_SLEEP); 2408153323Srodrigc ifp->if_broot_bytes = (int)new_size; 2409153323Srodrigc return; 2410153323Srodrigc } 2411153323Srodrigc 2412153323Srodrigc /* 2413153323Srodrigc * If there is already an existing if_broot, then we need 2414153323Srodrigc * to realloc() it and shift the pointers to their new 2415153323Srodrigc * location. The records don't change location because 2416153323Srodrigc * they are kept butted up against the btree block header. 2417153323Srodrigc */ 2418153323Srodrigc cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); 2419153323Srodrigc new_max = cur_max + rec_diff; 2420153323Srodrigc new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 2421153323Srodrigc ifp->if_broot = (xfs_bmbt_block_t *) 2422153323Srodrigc kmem_realloc(ifp->if_broot, 2423153323Srodrigc new_size, 2424153323Srodrigc (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ 2425153323Srodrigc KM_SLEEP); 2426153323Srodrigc op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 2427153323Srodrigc ifp->if_broot_bytes); 2428153323Srodrigc np = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 2429153323Srodrigc (int)new_size); 2430153323Srodrigc ifp->if_broot_bytes = (int)new_size; 2431153323Srodrigc ASSERT(ifp->if_broot_bytes <= 2432153323Srodrigc XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 2433153323Srodrigc memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); 2434153323Srodrigc return; 2435153323Srodrigc } 2436153323Srodrigc 2437153323Srodrigc /* 2438153323Srodrigc * rec_diff is less than 0. In this case, we are shrinking the 2439153323Srodrigc * if_broot buffer. It must already exist. If we go to zero 2440153323Srodrigc * records, just get rid of the root and clear the status bit. 2441153323Srodrigc */ 2442153323Srodrigc ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); 2443153323Srodrigc cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); 2444153323Srodrigc new_max = cur_max + rec_diff; 2445153323Srodrigc ASSERT(new_max >= 0); 2446153323Srodrigc if (new_max > 0) 2447153323Srodrigc new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 2448153323Srodrigc else 2449153323Srodrigc new_size = 0; 2450153323Srodrigc if (new_size > 0) { 2451153323Srodrigc new_broot = (xfs_bmbt_block_t *)kmem_alloc(new_size, KM_SLEEP); 2452153323Srodrigc /* 2453153323Srodrigc * First copy over the btree block header. 2454153323Srodrigc */ 2455153323Srodrigc memcpy(new_broot, ifp->if_broot, sizeof(xfs_bmbt_block_t)); 2456153323Srodrigc } else { 2457153323Srodrigc new_broot = NULL; 2458153323Srodrigc ifp->if_flags &= ~XFS_IFBROOT; 2459153323Srodrigc } 2460153323Srodrigc 2461153323Srodrigc /* 2462153323Srodrigc * Only copy the records and pointers if there are any. 2463153323Srodrigc */ 2464153323Srodrigc if (new_max > 0) { 2465153323Srodrigc /* 2466153323Srodrigc * First copy the records. 2467153323Srodrigc */ 2468153323Srodrigc op = (char *)XFS_BMAP_BROOT_REC_ADDR(ifp->if_broot, 1, 2469153323Srodrigc ifp->if_broot_bytes); 2470153323Srodrigc np = (char *)XFS_BMAP_BROOT_REC_ADDR(new_broot, 1, 2471153323Srodrigc (int)new_size); 2472153323Srodrigc memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); 2473153323Srodrigc 2474153323Srodrigc /* 2475153323Srodrigc * Then copy the pointers. 2476153323Srodrigc */ 2477153323Srodrigc op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 2478153323Srodrigc ifp->if_broot_bytes); 2479153323Srodrigc np = (char *)XFS_BMAP_BROOT_PTR_ADDR(new_broot, 1, 2480153323Srodrigc (int)new_size); 2481153323Srodrigc memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); 2482153323Srodrigc } 2483153323Srodrigc kmem_free(ifp->if_broot, ifp->if_broot_bytes); 2484153323Srodrigc ifp->if_broot = new_broot; 2485153323Srodrigc ifp->if_broot_bytes = (int)new_size; 2486153323Srodrigc ASSERT(ifp->if_broot_bytes <= 2487153323Srodrigc XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 2488153323Srodrigc return; 2489153323Srodrigc} 2490153323Srodrigc 2491153323Srodrigc 2492153323Srodrigc/* 2493153323Srodrigc * This is called when the amount of space needed for if_data 2494153323Srodrigc * is increased or decreased. The change in size is indicated by 2495153323Srodrigc * the number of bytes that need to be added or deleted in the 2496153323Srodrigc * byte_diff parameter. 2497153323Srodrigc * 2498153323Srodrigc * If the amount of space needed has decreased below the size of the 2499153323Srodrigc * inline buffer, then switch to using the inline buffer. Otherwise, 2500153323Srodrigc * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer 2501153323Srodrigc * to what is needed. 2502153323Srodrigc * 2503153323Srodrigc * ip -- the inode whose if_data area is changing 2504153323Srodrigc * byte_diff -- the change in the number of bytes, positive or negative, 2505153323Srodrigc * requested for the if_data array. 2506153323Srodrigc */ 2507153323Srodrigcvoid 2508153323Srodrigcxfs_idata_realloc( 2509153323Srodrigc xfs_inode_t *ip, 2510153323Srodrigc int byte_diff, 2511153323Srodrigc int whichfork) 2512153323Srodrigc{ 2513153323Srodrigc xfs_ifork_t *ifp; 2514153323Srodrigc int new_size; 2515153323Srodrigc int real_size; 2516153323Srodrigc 2517153323Srodrigc if (byte_diff == 0) { 2518153323Srodrigc return; 2519153323Srodrigc } 2520153323Srodrigc 2521153323Srodrigc ifp = XFS_IFORK_PTR(ip, whichfork); 2522153323Srodrigc new_size = (int)ifp->if_bytes + byte_diff; 2523153323Srodrigc ASSERT(new_size >= 0); 2524153323Srodrigc 2525153323Srodrigc if (new_size == 0) { 2526153323Srodrigc if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2527153323Srodrigc kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 2528153323Srodrigc } 2529153323Srodrigc ifp->if_u1.if_data = NULL; 2530153323Srodrigc real_size = 0; 2531153323Srodrigc } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { 2532153323Srodrigc /* 2533153323Srodrigc * If the valid extents/data can fit in if_inline_ext/data, 2534153323Srodrigc * copy them from the malloc'd vector and free it. 2535153323Srodrigc */ 2536153323Srodrigc if (ifp->if_u1.if_data == NULL) { 2537153323Srodrigc ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 2538153323Srodrigc } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2539153323Srodrigc ASSERT(ifp->if_real_bytes != 0); 2540153323Srodrigc memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, 2541153323Srodrigc new_size); 2542153323Srodrigc kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 2543153323Srodrigc ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 2544153323Srodrigc } 2545153323Srodrigc real_size = 0; 2546153323Srodrigc } else { 2547153323Srodrigc /* 2548153323Srodrigc * Stuck with malloc/realloc. 2549153323Srodrigc * For inline data, the underlying buffer must be 2550153323Srodrigc * a multiple of 4 bytes in size so that it can be 2551153323Srodrigc * logged and stay on word boundaries. We enforce 2552153323Srodrigc * that here. 2553153323Srodrigc */ 2554153323Srodrigc real_size = roundup(new_size, 4); 2555153323Srodrigc if (ifp->if_u1.if_data == NULL) { 2556153323Srodrigc ASSERT(ifp->if_real_bytes == 0); 2557153323Srodrigc ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 2558153323Srodrigc } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2559153323Srodrigc /* 2560153323Srodrigc * Only do the realloc if the underlying size 2561153323Srodrigc * is really changing. 2562153323Srodrigc */ 2563153323Srodrigc if (ifp->if_real_bytes != real_size) { 2564153323Srodrigc ifp->if_u1.if_data = 2565153323Srodrigc kmem_realloc(ifp->if_u1.if_data, 2566153323Srodrigc real_size, 2567153323Srodrigc ifp->if_real_bytes, 2568153323Srodrigc KM_SLEEP); 2569153323Srodrigc } 2570153323Srodrigc } else { 2571153323Srodrigc ASSERT(ifp->if_real_bytes == 0); 2572153323Srodrigc ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 2573153323Srodrigc memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, 2574153323Srodrigc ifp->if_bytes); 2575153323Srodrigc } 2576153323Srodrigc } 2577153323Srodrigc ifp->if_real_bytes = real_size; 2578153323Srodrigc ifp->if_bytes = new_size; 2579153323Srodrigc ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 2580153323Srodrigc} 2581153323Srodrigc 2582153323Srodrigc 2583153323Srodrigc 2584153323Srodrigc 2585153323Srodrigc/* 2586153323Srodrigc * Map inode to disk block and offset. 2587153323Srodrigc * 2588153323Srodrigc * mp -- the mount point structure for the current file system 2589153323Srodrigc * tp -- the current transaction 2590153323Srodrigc * ino -- the inode number of the inode to be located 2591153323Srodrigc * imap -- this structure is filled in with the information necessary 2592153323Srodrigc * to retrieve the given inode from disk 2593153323Srodrigc * flags -- flags to pass to xfs_dilocate indicating whether or not 2594153323Srodrigc * lookups in the inode btree were OK or not 2595153323Srodrigc */ 2596153323Srodrigcint 2597153323Srodrigcxfs_imap( 2598153323Srodrigc xfs_mount_t *mp, 2599153323Srodrigc xfs_trans_t *tp, 2600153323Srodrigc xfs_ino_t ino, 2601153323Srodrigc xfs_imap_t *imap, 2602153323Srodrigc uint flags) 2603153323Srodrigc{ 2604153323Srodrigc xfs_fsblock_t fsbno; 2605153323Srodrigc int len; 2606153323Srodrigc int off; 2607153323Srodrigc int error; 2608153323Srodrigc 2609153323Srodrigc fsbno = imap->im_blkno ? 2610153323Srodrigc XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; 2611153323Srodrigc error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); 2612153323Srodrigc if (error != 0) { 2613153323Srodrigc return error; 2614153323Srodrigc } 2615153323Srodrigc imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); 2616153323Srodrigc imap->im_len = XFS_FSB_TO_BB(mp, len); 2617153323Srodrigc imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); 2618153323Srodrigc imap->im_ioffset = (ushort)off; 2619153323Srodrigc imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); 2620153323Srodrigc return 0; 2621153323Srodrigc} 2622153323Srodrigc 2623153323Srodrigcvoid 2624153323Srodrigcxfs_idestroy_fork( 2625153323Srodrigc xfs_inode_t *ip, 2626153323Srodrigc int whichfork) 2627153323Srodrigc{ 2628153323Srodrigc xfs_ifork_t *ifp; 2629153323Srodrigc 2630153323Srodrigc ifp = XFS_IFORK_PTR(ip, whichfork); 2631153323Srodrigc if (ifp->if_broot != NULL) { 2632153323Srodrigc kmem_free(ifp->if_broot, ifp->if_broot_bytes); 2633153323Srodrigc ifp->if_broot = NULL; 2634153323Srodrigc } 2635153323Srodrigc 2636153323Srodrigc /* 2637153323Srodrigc * If the format is local, then we can't have an extents 2638153323Srodrigc * array so just look for an inline data array. If we're 2639153323Srodrigc * not local then we may or may not have an extents list, 2640153323Srodrigc * so check and free it up if we do. 2641153323Srodrigc */ 2642153323Srodrigc if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 2643153323Srodrigc if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && 2644153323Srodrigc (ifp->if_u1.if_data != NULL)) { 2645153323Srodrigc ASSERT(ifp->if_real_bytes != 0); 2646153323Srodrigc kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 2647153323Srodrigc ifp->if_u1.if_data = NULL; 2648153323Srodrigc ifp->if_real_bytes = 0; 2649153323Srodrigc } 2650153323Srodrigc } else if ((ifp->if_flags & XFS_IFEXTENTS) && 2651159451Srodrigc ((ifp->if_flags & XFS_IFEXTIREC) || 2652159451Srodrigc ((ifp->if_u1.if_extents != NULL) && 2653159451Srodrigc (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { 2654153323Srodrigc ASSERT(ifp->if_real_bytes != 0); 2655159451Srodrigc xfs_iext_destroy(ifp); 2656153323Srodrigc } 2657153323Srodrigc ASSERT(ifp->if_u1.if_extents == NULL || 2658153323Srodrigc ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); 2659153323Srodrigc ASSERT(ifp->if_real_bytes == 0); 2660153323Srodrigc if (whichfork == XFS_ATTR_FORK) { 2661153323Srodrigc kmem_zone_free(xfs_ifork_zone, ip->i_afp); 2662153323Srodrigc ip->i_afp = NULL; 2663153323Srodrigc } 2664153323Srodrigc} 2665153323Srodrigc 2666153323Srodrigc/* 2667153323Srodrigc * This is called free all the memory associated with an inode. 2668153323Srodrigc * It must free the inode itself and any buffers allocated for 2669153323Srodrigc * if_extents/if_data and if_broot. It must also free the lock 2670153323Srodrigc * associated with the inode. 2671153323Srodrigc */ 2672153323Srodrigcvoid 2673153323Srodrigcxfs_idestroy( 2674153323Srodrigc xfs_inode_t *ip) 2675153323Srodrigc{ 2676153323Srodrigc 2677153323Srodrigc switch (ip->i_d.di_mode & S_IFMT) { 2678153323Srodrigc case S_IFREG: 2679153323Srodrigc case S_IFDIR: 2680153323Srodrigc case S_IFLNK: 2681153323Srodrigc xfs_idestroy_fork(ip, XFS_DATA_FORK); 2682153323Srodrigc break; 2683153323Srodrigc } 2684153323Srodrigc if (ip->i_afp) 2685153323Srodrigc xfs_idestroy_fork(ip, XFS_ATTR_FORK); 2686153323Srodrigc mrfree(&ip->i_lock); 2687153323Srodrigc mrfree(&ip->i_iolock); 2688153323Srodrigc freesema(&ip->i_flock); 2689153323Srodrigc#ifdef XFS_BMAP_TRACE 2690153323Srodrigc ktrace_free(ip->i_xtrace); 2691153323Srodrigc#endif 2692153323Srodrigc#ifdef XFS_BMBT_TRACE 2693153323Srodrigc ktrace_free(ip->i_btrace); 2694153323Srodrigc#endif 2695153323Srodrigc#ifdef XFS_RW_TRACE 2696153323Srodrigc ktrace_free(ip->i_rwtrace); 2697153323Srodrigc#endif 2698153323Srodrigc#ifdef XFS_ILOCK_TRACE 2699153323Srodrigc ktrace_free(ip->i_lock_trace); 2700153323Srodrigc#endif 2701153323Srodrigc#ifdef XFS_DIR2_TRACE 2702153323Srodrigc ktrace_free(ip->i_dir_trace); 2703153323Srodrigc#endif 2704153323Srodrigc if (ip->i_itemp) { 2705153323Srodrigc /* XXXdpd should be able to assert this but shutdown 2706153323Srodrigc * is leaving the AIL behind. */ 2707153323Srodrigc ASSERT(((ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL) == 0) || 2708153323Srodrigc XFS_FORCED_SHUTDOWN(ip->i_mount)); 2709153323Srodrigc xfs_inode_item_destroy(ip); 2710153323Srodrigc } 2711153323Srodrigc kmem_zone_free(xfs_inode_zone, ip); 2712153323Srodrigc} 2713153323Srodrigc 2714153323Srodrigc 2715153323Srodrigc/* 2716153323Srodrigc * Increment the pin count of the given buffer. 2717153323Srodrigc * This value is protected by ipinlock spinlock in the mount structure. 2718153323Srodrigc */ 2719153323Srodrigcvoid 2720153323Srodrigcxfs_ipin( 2721153323Srodrigc xfs_inode_t *ip) 2722153323Srodrigc{ 2723153323Srodrigc ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 2724153323Srodrigc 2725153323Srodrigc atomic_inc(&ip->i_pincount); 2726153323Srodrigc} 2727153323Srodrigc 2728153323Srodrigc/* 2729153323Srodrigc * Decrement the pin count of the given inode, and wake up 2730153323Srodrigc * anyone in xfs_iwait_unpin() if the count goes to 0. The 2731159451Srodrigc * inode must have been previously pinned with a call to xfs_ipin(). 2732153323Srodrigc */ 2733153323Srodrigcvoid 2734153323Srodrigcxfs_iunpin( 2735153323Srodrigc xfs_inode_t *ip) 2736153323Srodrigc{ 2737153323Srodrigc ASSERT(atomic_read(&ip->i_pincount) > 0); 2738153323Srodrigc 2739153323Srodrigc if (atomic_dec_and_test(&ip->i_pincount)) { 2740153323Srodrigc /* 2741159451Srodrigc * If the inode is currently being reclaimed, the 2742159451Srodrigc * linux inode _and_ the xfs vnode may have been 2743159451Srodrigc * freed so we cannot reference either of them safely. 2744159451Srodrigc * Hence we should not try to do anything to them 2745159451Srodrigc * if the xfs inode is currently in the reclaim 2746159451Srodrigc * path. 2747159451Srodrigc * 2748159451Srodrigc * However, we still need to issue the unpin wakeup 2749159451Srodrigc * call as the inode reclaim may be blocked waiting for 2750159451Srodrigc * the inode to become unpinned. 2751153323Srodrigc */ 2752159451Srodrigc if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) { 2753159451Srodrigc /* 2754159451Srodrigc * Should I mark FreeBSD vnode as dirty here? 2755159451Srodrigc */ 2756159451Srodrigc printf("xfs_iunpin: REC RECABLE ip %p\n",ip); 2757159451Srodrigc#ifdef RMC 2758159451Srodrigc xfs_vnode_t *vp = XFS_ITOV_NULL(ip); 2759153323Srodrigc 2760159451Srodrigc /* make sync come back and flush this inode */ 2761159451Srodrigc if (vp) { 2762159451Srodrigc struct inode *inode = vn_to_inode(vp); 2763153323Srodrigc 2764159451Srodrigc if (!(inode->i_state & I_NEW)) 2765159451Srodrigc mark_inode_dirty_sync(inode); 2766159451Srodrigc } 2767159451Srodrigc#endif 2768153323Srodrigc } 2769153323Srodrigc wakeup(&ip->i_ipin_wait); 2770153323Srodrigc } 2771153323Srodrigc} 2772153323Srodrigc 2773153323Srodrigc/* 2774153323Srodrigc * This is called to wait for the given inode to be unpinned. 2775153323Srodrigc * It will sleep until this happens. The caller must have the 2776153323Srodrigc * inode locked in at least shared mode so that the buffer cannot 2777153323Srodrigc * be subsequently pinned once someone is waiting for it to be 2778153323Srodrigc * unpinned. 2779153323Srodrigc */ 2780153323SrodrigcSTATIC void 2781153323Srodrigcxfs_iunpin_wait( 2782153323Srodrigc xfs_inode_t *ip) 2783153323Srodrigc{ 2784153323Srodrigc xfs_inode_log_item_t *iip; 2785153323Srodrigc xfs_lsn_t lsn; 2786153323Srodrigc 2787153323Srodrigc ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); 2788153323Srodrigc 2789153323Srodrigc if (atomic_read(&ip->i_pincount) == 0) { 2790153323Srodrigc return; 2791153323Srodrigc } 2792153323Srodrigc 2793153323Srodrigc iip = ip->i_itemp; 2794153323Srodrigc if (iip && iip->ili_last_lsn) { 2795153323Srodrigc lsn = iip->ili_last_lsn; 2796153323Srodrigc } else { 2797153323Srodrigc lsn = (xfs_lsn_t)0; 2798153323Srodrigc } 2799153323Srodrigc 2800153323Srodrigc /* 2801153323Srodrigc * Give the log a push so we don't wait here too long. 2802153323Srodrigc */ 2803153323Srodrigc xfs_log_force(ip->i_mount, lsn, XFS_LOG_FORCE); 2804153323Srodrigc 2805153323Srodrigc /* 2806153323Srodrigc * XXXKAN: xfs_iunpin is not locking inode 2807153323Srodrigc * at all? 2808153323Srodrigc */ 2809153323Srodrigc while(atomic_read(&ip->i_pincount) != 0) 2810153323Srodrigc tsleep(&ip->i_ipin_wait, PRIBIO, "iunpin", 0); 2811153323Srodrigc} 2812153323Srodrigc 2813153323Srodrigc 2814153323Srodrigc/* 2815153323Srodrigc * xfs_iextents_copy() 2816153323Srodrigc * 2817153323Srodrigc * This is called to copy the REAL extents (as opposed to the delayed 2818153323Srodrigc * allocation extents) from the inode into the given buffer. It 2819153323Srodrigc * returns the number of bytes copied into the buffer. 2820153323Srodrigc * 2821153323Srodrigc * If there are no delayed allocation extents, then we can just 2822153323Srodrigc * memcpy() the extents into the buffer. Otherwise, we need to 2823153323Srodrigc * examine each extent in turn and skip those which are delayed. 2824153323Srodrigc */ 2825153323Srodrigcint 2826153323Srodrigcxfs_iextents_copy( 2827153323Srodrigc xfs_inode_t *ip, 2828153323Srodrigc xfs_bmbt_rec_t *buffer, 2829153323Srodrigc int whichfork) 2830153323Srodrigc{ 2831153323Srodrigc int copied; 2832153323Srodrigc xfs_bmbt_rec_t *dest_ep; 2833153323Srodrigc xfs_bmbt_rec_t *ep; 2834153323Srodrigc#ifdef XFS_BMAP_TRACE 2835153323Srodrigc static char fname[] = "xfs_iextents_copy"; 2836153323Srodrigc#endif 2837153323Srodrigc int i; 2838153323Srodrigc xfs_ifork_t *ifp; 2839153323Srodrigc int nrecs; 2840153323Srodrigc xfs_fsblock_t start_block; 2841153323Srodrigc 2842153323Srodrigc ifp = XFS_IFORK_PTR(ip, whichfork); 2843153323Srodrigc ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 2844153323Srodrigc ASSERT(ifp->if_bytes > 0); 2845153323Srodrigc 2846153323Srodrigc nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 2847153323Srodrigc xfs_bmap_trace_exlist(fname, ip, nrecs, whichfork); 2848153323Srodrigc ASSERT(nrecs > 0); 2849153323Srodrigc 2850153323Srodrigc /* 2851153323Srodrigc * There are some delayed allocation extents in the 2852153323Srodrigc * inode, so copy the extents one at a time and skip 2853153323Srodrigc * the delayed ones. There must be at least one 2854153323Srodrigc * non-delayed extent. 2855153323Srodrigc */ 2856153323Srodrigc dest_ep = buffer; 2857153323Srodrigc copied = 0; 2858153323Srodrigc for (i = 0; i < nrecs; i++) { 2859159451Srodrigc ep = xfs_iext_get_ext(ifp, i); 2860153323Srodrigc start_block = xfs_bmbt_get_startblock(ep); 2861153323Srodrigc if (ISNULLSTARTBLOCK(start_block)) { 2862153323Srodrigc /* 2863153323Srodrigc * It's a delayed allocation extent, so skip it. 2864153323Srodrigc */ 2865153323Srodrigc continue; 2866153323Srodrigc } 2867153323Srodrigc 2868153323Srodrigc /* Translate to on disk format */ 2869153323Srodrigc put_unaligned(INT_GET(ep->l0, ARCH_CONVERT), 2870153323Srodrigc (__uint64_t*)&dest_ep->l0); 2871153323Srodrigc put_unaligned(INT_GET(ep->l1, ARCH_CONVERT), 2872153323Srodrigc (__uint64_t*)&dest_ep->l1); 2873153323Srodrigc dest_ep++; 2874153323Srodrigc copied++; 2875153323Srodrigc } 2876153323Srodrigc ASSERT(copied != 0); 2877159451Srodrigc xfs_validate_extents(ifp, copied, 1, XFS_EXTFMT_INODE(ip)); 2878153323Srodrigc 2879153323Srodrigc return (copied * (uint)sizeof(xfs_bmbt_rec_t)); 2880153323Srodrigc} 2881153323Srodrigc 2882153323Srodrigc/* 2883153323Srodrigc * Each of the following cases stores data into the same region 2884153323Srodrigc * of the on-disk inode, so only one of them can be valid at 2885153323Srodrigc * any given time. While it is possible to have conflicting formats 2886153323Srodrigc * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is 2887153323Srodrigc * in EXTENTS format, this can only happen when the fork has 2888153323Srodrigc * changed formats after being modified but before being flushed. 2889153323Srodrigc * In these cases, the format always takes precedence, because the 2890153323Srodrigc * format indicates the current state of the fork. 2891153323Srodrigc */ 2892153323Srodrigc/*ARGSUSED*/ 2893153323SrodrigcSTATIC int 2894153323Srodrigcxfs_iflush_fork( 2895153323Srodrigc xfs_inode_t *ip, 2896153323Srodrigc xfs_dinode_t *dip, 2897153323Srodrigc xfs_inode_log_item_t *iip, 2898153323Srodrigc int whichfork, 2899153323Srodrigc xfs_buf_t *bp) 2900153323Srodrigc{ 2901153323Srodrigc char *cp; 2902153323Srodrigc xfs_ifork_t *ifp; 2903153323Srodrigc xfs_mount_t *mp; 2904153323Srodrigc#ifdef XFS_TRANS_DEBUG 2905153323Srodrigc int first; 2906153323Srodrigc#endif 2907153323Srodrigc static const short brootflag[2] = 2908153323Srodrigc { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; 2909153323Srodrigc static const short dataflag[2] = 2910153323Srodrigc { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; 2911153323Srodrigc static const short extflag[2] = 2912153323Srodrigc { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; 2913153323Srodrigc 2914153323Srodrigc if (iip == NULL) 2915153323Srodrigc return 0; 2916153323Srodrigc ifp = XFS_IFORK_PTR(ip, whichfork); 2917153323Srodrigc /* 2918153323Srodrigc * This can happen if we gave up in iformat in an error path, 2919153323Srodrigc * for the attribute fork. 2920153323Srodrigc */ 2921153323Srodrigc if (ifp == NULL) { 2922153323Srodrigc ASSERT(whichfork == XFS_ATTR_FORK); 2923153323Srodrigc return 0; 2924153323Srodrigc } 2925159451Srodrigc cp = XFS_DFORK_PTR(dip, whichfork); 2926153323Srodrigc mp = ip->i_mount; 2927153323Srodrigc switch (XFS_IFORK_FORMAT(ip, whichfork)) { 2928153323Srodrigc case XFS_DINODE_FMT_LOCAL: 2929153323Srodrigc if ((iip->ili_format.ilf_fields & dataflag[whichfork]) && 2930153323Srodrigc (ifp->if_bytes > 0)) { 2931153323Srodrigc ASSERT(ifp->if_u1.if_data != NULL); 2932153323Srodrigc ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 2933153323Srodrigc memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); 2934153323Srodrigc } 2935153323Srodrigc if (whichfork == XFS_DATA_FORK) { 2936153323Srodrigc if (unlikely(XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp, dip))) { 2937153323Srodrigc XFS_ERROR_REPORT("xfs_iflush_fork", 2938153323Srodrigc XFS_ERRLEVEL_LOW, mp); 2939153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 2940153323Srodrigc } 2941153323Srodrigc } 2942153323Srodrigc break; 2943153323Srodrigc 2944153323Srodrigc case XFS_DINODE_FMT_EXTENTS: 2945153323Srodrigc ASSERT((ifp->if_flags & XFS_IFEXTENTS) || 2946153323Srodrigc !(iip->ili_format.ilf_fields & extflag[whichfork])); 2947159451Srodrigc ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) || 2948159451Srodrigc (ifp->if_bytes == 0)); 2949159451Srodrigc ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) || 2950159451Srodrigc (ifp->if_bytes > 0)); 2951153323Srodrigc if ((iip->ili_format.ilf_fields & extflag[whichfork]) && 2952153323Srodrigc (ifp->if_bytes > 0)) { 2953153323Srodrigc ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); 2954153323Srodrigc (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, 2955153323Srodrigc whichfork); 2956153323Srodrigc } 2957153323Srodrigc break; 2958153323Srodrigc 2959153323Srodrigc case XFS_DINODE_FMT_BTREE: 2960153323Srodrigc if ((iip->ili_format.ilf_fields & brootflag[whichfork]) && 2961153323Srodrigc (ifp->if_broot_bytes > 0)) { 2962153323Srodrigc ASSERT(ifp->if_broot != NULL); 2963153323Srodrigc ASSERT(ifp->if_broot_bytes <= 2964153323Srodrigc (XFS_IFORK_SIZE(ip, whichfork) + 2965153323Srodrigc XFS_BROOT_SIZE_ADJ)); 2966153323Srodrigc xfs_bmbt_to_bmdr(ifp->if_broot, ifp->if_broot_bytes, 2967153323Srodrigc (xfs_bmdr_block_t *)cp, 2968159451Srodrigc XFS_DFORK_SIZE(dip, mp, whichfork)); 2969153323Srodrigc } 2970153323Srodrigc break; 2971153323Srodrigc 2972153323Srodrigc case XFS_DINODE_FMT_DEV: 2973153323Srodrigc if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 2974153323Srodrigc ASSERT(whichfork == XFS_DATA_FORK); 2975153323Srodrigc INT_SET(dip->di_u.di_dev, ARCH_CONVERT, ip->i_df.if_u2.if_rdev); 2976153323Srodrigc } 2977153323Srodrigc break; 2978153323Srodrigc 2979153323Srodrigc case XFS_DINODE_FMT_UUID: 2980153323Srodrigc if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 2981153323Srodrigc ASSERT(whichfork == XFS_DATA_FORK); 2982153323Srodrigc memcpy(&dip->di_u.di_muuid, &ip->i_df.if_u2.if_uuid, 2983153323Srodrigc sizeof(uuid_t)); 2984153323Srodrigc } 2985153323Srodrigc break; 2986153323Srodrigc 2987153323Srodrigc default: 2988153323Srodrigc ASSERT(0); 2989153323Srodrigc break; 2990153323Srodrigc } 2991153323Srodrigc 2992153323Srodrigc return 0; 2993153323Srodrigc} 2994153323Srodrigc 2995153323Srodrigc/* 2996153323Srodrigc * xfs_iflush() will write a modified inode's changes out to the 2997153323Srodrigc * inode's on disk home. The caller must have the inode lock held 2998153323Srodrigc * in at least shared mode and the inode flush semaphore must be 2999153323Srodrigc * held as well. The inode lock will still be held upon return from 3000153323Srodrigc * the call and the caller is free to unlock it. 3001153323Srodrigc * The inode flush lock will be unlocked when the inode reaches the disk. 3002153323Srodrigc * The flags indicate how the inode's buffer should be written out. 3003153323Srodrigc */ 3004153323Srodrigcint 3005153323Srodrigcxfs_iflush( 3006153323Srodrigc xfs_inode_t *ip, 3007153323Srodrigc uint flags) 3008153323Srodrigc{ 3009153323Srodrigc xfs_inode_log_item_t *iip; 3010153323Srodrigc xfs_buf_t *bp; 3011153323Srodrigc xfs_dinode_t *dip; 3012153323Srodrigc xfs_mount_t *mp; 3013153323Srodrigc int error; 3014153323Srodrigc /* REFERENCED */ 3015153323Srodrigc xfs_chash_t *ch; 3016153323Srodrigc xfs_inode_t *iq; 3017153323Srodrigc int clcount; /* count of inodes clustered */ 3018153323Srodrigc int bufwasdelwri; 3019153323Srodrigc enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; 3020153323Srodrigc SPLDECL(s); 3021153323Srodrigc 3022153323Srodrigc XFS_STATS_INC(xs_iflush_count); 3023153323Srodrigc 3024159451Srodrigc 3025159451Srodrigc printf("xfs_iflush: ip %p i_ino %lld\n",ip,ip->i_ino); 3026153323Srodrigc ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 3027153323Srodrigc ASSERT(valusema(&ip->i_flock) <= 0); 3028153323Srodrigc ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3029153323Srodrigc ip->i_d.di_nextents > ip->i_df.if_ext_max); 3030153323Srodrigc 3031153323Srodrigc iip = ip->i_itemp; 3032153323Srodrigc mp = ip->i_mount; 3033153323Srodrigc 3034153323Srodrigc /* 3035153323Srodrigc * If the inode isn't dirty, then just release the inode 3036153323Srodrigc * flush lock and do nothing. 3037153323Srodrigc */ 3038153323Srodrigc if ((ip->i_update_core == 0) && 3039153323Srodrigc ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { 3040153323Srodrigc ASSERT((iip != NULL) ? 3041153323Srodrigc !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1); 3042153323Srodrigc xfs_ifunlock(ip); 3043153323Srodrigc return 0; 3044153323Srodrigc } 3045153323Srodrigc 3046153323Srodrigc /* 3047153323Srodrigc * We can't flush the inode until it is unpinned, so 3048153323Srodrigc * wait for it. We know noone new can pin it, because 3049153323Srodrigc * we are holding the inode lock shared and you need 3050153323Srodrigc * to hold it exclusively to pin the inode. 3051153323Srodrigc */ 3052153323Srodrigc xfs_iunpin_wait(ip); 3053153323Srodrigc 3054153323Srodrigc /* 3055153323Srodrigc * This may have been unpinned because the filesystem is shutting 3056153323Srodrigc * down forcibly. If that's the case we must not write this inode 3057153323Srodrigc * to disk, because the log record didn't make it to disk! 3058153323Srodrigc */ 3059153323Srodrigc if (XFS_FORCED_SHUTDOWN(mp)) { 3060153323Srodrigc ip->i_update_core = 0; 3061153323Srodrigc if (iip) 3062153323Srodrigc iip->ili_format.ilf_fields = 0; 3063153323Srodrigc xfs_ifunlock(ip); 3064153323Srodrigc return XFS_ERROR(EIO); 3065153323Srodrigc } 3066153323Srodrigc 3067153323Srodrigc /* 3068153323Srodrigc * Get the buffer containing the on-disk inode. 3069153323Srodrigc */ 3070159451Srodrigc error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0); 3071159451Srodrigc if (error) { 3072153323Srodrigc xfs_ifunlock(ip); 3073153323Srodrigc return error; 3074153323Srodrigc } 3075153323Srodrigc 3076153323Srodrigc /* 3077153323Srodrigc * Decide how buffer will be flushed out. This is done before 3078153323Srodrigc * the call to xfs_iflush_int because this field is zeroed by it. 3079153323Srodrigc */ 3080153323Srodrigc if (iip != NULL && iip->ili_format.ilf_fields != 0) { 3081153323Srodrigc /* 3082153323Srodrigc * Flush out the inode buffer according to the directions 3083153323Srodrigc * of the caller. In the cases where the caller has given 3084153323Srodrigc * us a choice choose the non-delwri case. This is because 3085153323Srodrigc * the inode is in the AIL and we need to get it out soon. 3086153323Srodrigc */ 3087153323Srodrigc switch (flags) { 3088153323Srodrigc case XFS_IFLUSH_SYNC: 3089153323Srodrigc case XFS_IFLUSH_DELWRI_ELSE_SYNC: 3090153323Srodrigc flags = 0; 3091153323Srodrigc break; 3092153323Srodrigc case XFS_IFLUSH_ASYNC: 3093153323Srodrigc case XFS_IFLUSH_DELWRI_ELSE_ASYNC: 3094153323Srodrigc flags = INT_ASYNC; 3095153323Srodrigc break; 3096153323Srodrigc case XFS_IFLUSH_DELWRI: 3097153323Srodrigc flags = INT_DELWRI; 3098153323Srodrigc break; 3099153323Srodrigc default: 3100153323Srodrigc ASSERT(0); 3101153323Srodrigc flags = 0; 3102153323Srodrigc break; 3103153323Srodrigc } 3104153323Srodrigc } else { 3105153323Srodrigc switch (flags) { 3106153323Srodrigc case XFS_IFLUSH_DELWRI_ELSE_SYNC: 3107153323Srodrigc case XFS_IFLUSH_DELWRI_ELSE_ASYNC: 3108153323Srodrigc case XFS_IFLUSH_DELWRI: 3109153323Srodrigc flags = INT_DELWRI; 3110153323Srodrigc break; 3111153323Srodrigc case XFS_IFLUSH_ASYNC: 3112153323Srodrigc flags = INT_ASYNC; 3113153323Srodrigc break; 3114153323Srodrigc case XFS_IFLUSH_SYNC: 3115153323Srodrigc flags = 0; 3116153323Srodrigc break; 3117153323Srodrigc default: 3118153323Srodrigc ASSERT(0); 3119153323Srodrigc flags = 0; 3120153323Srodrigc break; 3121153323Srodrigc } 3122153323Srodrigc } 3123153323Srodrigc 3124153323Srodrigc /* 3125153323Srodrigc * First flush out the inode that xfs_iflush was called with. 3126153323Srodrigc */ 3127153323Srodrigc error = xfs_iflush_int(ip, bp); 3128153323Srodrigc if (error) { 3129153323Srodrigc goto corrupt_out; 3130153323Srodrigc } 3131153323Srodrigc 3132153323Srodrigc /* 3133153323Srodrigc * inode clustering: 3134153323Srodrigc * see if other inodes can be gathered into this write 3135153323Srodrigc */ 3136153323Srodrigc 3137153323Srodrigc ip->i_chash->chl_buf = bp; 3138153323Srodrigc 3139153323Srodrigc ch = XFS_CHASH(mp, ip->i_blkno); 3140153323Srodrigc s = mutex_spinlock(&ch->ch_lock); 3141153323Srodrigc 3142153323Srodrigc clcount = 0; 3143153323Srodrigc for (iq = ip->i_cnext; iq != ip; iq = iq->i_cnext) { 3144153323Srodrigc /* 3145153323Srodrigc * Do an un-protected check to see if the inode is dirty and 3146153323Srodrigc * is a candidate for flushing. These checks will be repeated 3147153323Srodrigc * later after the appropriate locks are acquired. 3148153323Srodrigc */ 3149153323Srodrigc iip = iq->i_itemp; 3150153323Srodrigc if ((iq->i_update_core == 0) && 3151153323Srodrigc ((iip == NULL) || 3152153323Srodrigc !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) && 3153153323Srodrigc xfs_ipincount(iq) == 0) { 3154153323Srodrigc continue; 3155153323Srodrigc } 3156153323Srodrigc 3157153323Srodrigc /* 3158153323Srodrigc * Try to get locks. If any are unavailable, 3159153323Srodrigc * then this inode cannot be flushed and is skipped. 3160153323Srodrigc */ 3161153323Srodrigc 3162153323Srodrigc /* get inode locks (just i_lock) */ 3163153323Srodrigc if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) { 3164153323Srodrigc /* get inode flush lock */ 3165153323Srodrigc if (xfs_iflock_nowait(iq)) { 3166153323Srodrigc /* check if pinned */ 3167153323Srodrigc if (xfs_ipincount(iq) == 0) { 3168153323Srodrigc /* arriving here means that 3169153323Srodrigc * this inode can be flushed. 3170153323Srodrigc * first re-check that it's 3171153323Srodrigc * dirty 3172153323Srodrigc */ 3173153323Srodrigc iip = iq->i_itemp; 3174153323Srodrigc if ((iq->i_update_core != 0)|| 3175153323Srodrigc ((iip != NULL) && 3176153323Srodrigc (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { 3177153323Srodrigc clcount++; 3178153323Srodrigc error = xfs_iflush_int(iq, bp); 3179153323Srodrigc if (error) { 3180153323Srodrigc xfs_iunlock(iq, 3181153323Srodrigc XFS_ILOCK_SHARED); 3182153323Srodrigc goto cluster_corrupt_out; 3183153323Srodrigc } 3184153323Srodrigc } else { 3185153323Srodrigc xfs_ifunlock(iq); 3186153323Srodrigc } 3187153323Srodrigc } else { 3188153323Srodrigc xfs_ifunlock(iq); 3189153323Srodrigc } 3190153323Srodrigc } 3191153323Srodrigc xfs_iunlock(iq, XFS_ILOCK_SHARED); 3192153323Srodrigc } 3193153323Srodrigc } 3194153323Srodrigc mutex_spinunlock(&ch->ch_lock, s); 3195153323Srodrigc 3196153323Srodrigc if (clcount) { 3197153323Srodrigc XFS_STATS_INC(xs_icluster_flushcnt); 3198153323Srodrigc XFS_STATS_ADD(xs_icluster_flushinode, clcount); 3199153323Srodrigc } 3200153323Srodrigc 3201153323Srodrigc /* 3202153323Srodrigc * If the buffer is pinned then push on the log so we won't 3203153323Srodrigc * get stuck waiting in the write for too long. 3204153323Srodrigc */ 3205153323Srodrigc if (XFS_BUF_ISPINNED(bp)){ 3206153323Srodrigc xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 3207153323Srodrigc } 3208153323Srodrigc 3209153323Srodrigc if (flags & INT_DELWRI) { 3210153323Srodrigc xfs_bdwrite(mp, bp); 3211153323Srodrigc } else if (flags & INT_ASYNC) { 3212153323Srodrigc xfs_bawrite(mp, bp); 3213153323Srodrigc } else { 3214153323Srodrigc error = xfs_bwrite(mp, bp); 3215153323Srodrigc } 3216153323Srodrigc return error; 3217153323Srodrigc 3218153323Srodrigccorrupt_out: 3219153323Srodrigc xfs_buf_relse(bp); 3220153323Srodrigc xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); 3221153323Srodrigc xfs_iflush_abort(ip); 3222153323Srodrigc /* 3223153323Srodrigc * Unlocks the flush lock 3224153323Srodrigc */ 3225153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 3226153323Srodrigc 3227153323Srodrigccluster_corrupt_out: 3228153323Srodrigc /* Corruption detected in the clustering loop. Invalidate the 3229153323Srodrigc * inode buffer and shut down the filesystem. 3230153323Srodrigc */ 3231153323Srodrigc mutex_spinunlock(&ch->ch_lock, s); 3232153323Srodrigc 3233153323Srodrigc /* 3234153323Srodrigc * Clean up the buffer. If it was B_DELWRI, just release it -- 3235153323Srodrigc * brelse can handle it with no problems. If not, shut down the 3236153323Srodrigc * filesystem before releasing the buffer. 3237153323Srodrigc */ 3238153323Srodrigc if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) { 3239153323Srodrigc xfs_buf_relse(bp); 3240153323Srodrigc } 3241153323Srodrigc 3242153323Srodrigc xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); 3243153323Srodrigc 3244153323Srodrigc if(!bufwasdelwri) { 3245153323Srodrigc /* 3246153323Srodrigc * Just like incore_relse: if we have b_iodone functions, 3247153323Srodrigc * mark the buffer as an error and call them. Otherwise 3248153323Srodrigc * mark it as stale and brelse. 3249153323Srodrigc */ 3250153323Srodrigc if (XFS_BUF_IODONE_FUNC(bp)) { 3251153323Srodrigc XFS_BUF_CLR_BDSTRAT_FUNC(bp); 3252153323Srodrigc XFS_BUF_UNDONE(bp); 3253153323Srodrigc XFS_BUF_STALE(bp); 3254153323Srodrigc XFS_BUF_SHUT(bp); 3255153323Srodrigc XFS_BUF_ERROR(bp,EIO); 3256153323Srodrigc xfs_biodone(bp); 3257153323Srodrigc } else { 3258153323Srodrigc XFS_BUF_STALE(bp); 3259153323Srodrigc xfs_buf_relse(bp); 3260153323Srodrigc } 3261153323Srodrigc } 3262153323Srodrigc 3263153323Srodrigc xfs_iflush_abort(iq); 3264153323Srodrigc /* 3265153323Srodrigc * Unlocks the flush lock 3266153323Srodrigc */ 3267153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 3268153323Srodrigc} 3269153323Srodrigc 3270153323Srodrigc 3271153323SrodrigcSTATIC int 3272153323Srodrigcxfs_iflush_int( 3273153323Srodrigc xfs_inode_t *ip, 3274153323Srodrigc xfs_buf_t *bp) 3275153323Srodrigc{ 3276153323Srodrigc xfs_inode_log_item_t *iip; 3277153323Srodrigc xfs_dinode_t *dip; 3278153323Srodrigc xfs_mount_t *mp; 3279153323Srodrigc#ifdef XFS_TRANS_DEBUG 3280159451Srodrigc // int first; 3281153323Srodrigc#endif 3282153323Srodrigc SPLDECL(s); 3283153323Srodrigc 3284153323Srodrigc ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 3285153323Srodrigc ASSERT(valusema(&ip->i_flock) <= 0); 3286153323Srodrigc ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3287153323Srodrigc ip->i_d.di_nextents > ip->i_df.if_ext_max); 3288153323Srodrigc 3289153323Srodrigc iip = ip->i_itemp; 3290153323Srodrigc mp = ip->i_mount; 3291153323Srodrigc 3292153323Srodrigc 3293153323Srodrigc /* 3294153323Srodrigc * If the inode isn't dirty, then just release the inode 3295153323Srodrigc * flush lock and do nothing. 3296153323Srodrigc */ 3297153323Srodrigc if ((ip->i_update_core == 0) && 3298153323Srodrigc ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { 3299153323Srodrigc xfs_ifunlock(ip); 3300153323Srodrigc return 0; 3301153323Srodrigc } 3302153323Srodrigc 3303153323Srodrigc /* set *dip = inode's place in the buffer */ 3304153323Srodrigc dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_boffset); 3305153323Srodrigc 3306153323Srodrigc /* 3307153323Srodrigc * Clear i_update_core before copying out the data. 3308153323Srodrigc * This is for coordination with our timestamp updates 3309153323Srodrigc * that don't hold the inode lock. They will always 3310153323Srodrigc * update the timestamps BEFORE setting i_update_core, 3311153323Srodrigc * so if we clear i_update_core after they set it we 3312153323Srodrigc * are guaranteed to see their updates to the timestamps. 3313153323Srodrigc * I believe that this depends on strongly ordered memory 3314153323Srodrigc * semantics, but we have that. We use the SYNCHRONIZE 3315153323Srodrigc * macro to make sure that the compiler does not reorder 3316153323Srodrigc * the i_update_core access below the data copy below. 3317153323Srodrigc */ 3318153323Srodrigc ip->i_update_core = 0; 3319153323Srodrigc SYNCHRONIZE(); 3320153323Srodrigc 3321159451Srodrigc /* 3322159451Srodrigc * Make sure to get the latest atime from the Linux inode. 3323159451Srodrigc */ 3324159451Srodrigc xfs_synchronize_atime(ip); 3325159451Srodrigc 3326153323Srodrigc if (XFS_TEST_ERROR(INT_GET(dip->di_core.di_magic,ARCH_CONVERT) != XFS_DINODE_MAGIC, 3327153323Srodrigc mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 3328153323Srodrigc xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3329153323Srodrigc "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", 3330153323Srodrigc ip->i_ino, (int) INT_GET(dip->di_core.di_magic, ARCH_CONVERT), dip); 3331153323Srodrigc goto corrupt_out; 3332153323Srodrigc } 3333153323Srodrigc if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, 3334153323Srodrigc mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { 3335153323Srodrigc xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3336153323Srodrigc "xfs_iflush: Bad inode %Lu, ptr 0x%p, magic number 0x%x", 3337153323Srodrigc ip->i_ino, ip, ip->i_d.di_magic); 3338153323Srodrigc goto corrupt_out; 3339153323Srodrigc } 3340153323Srodrigc if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 3341153323Srodrigc if (XFS_TEST_ERROR( 3342153323Srodrigc (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 3343153323Srodrigc (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 3344153323Srodrigc mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { 3345153323Srodrigc xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3346153323Srodrigc "xfs_iflush: Bad regular inode %Lu, ptr 0x%p", 3347153323Srodrigc ip->i_ino, ip); 3348153323Srodrigc goto corrupt_out; 3349153323Srodrigc } 3350153323Srodrigc } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 3351153323Srodrigc if (XFS_TEST_ERROR( 3352153323Srodrigc (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 3353153323Srodrigc (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 3354153323Srodrigc (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 3355153323Srodrigc mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { 3356153323Srodrigc xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3357153323Srodrigc "xfs_iflush: Bad directory inode %Lu, ptr 0x%p", 3358153323Srodrigc ip->i_ino, ip); 3359153323Srodrigc goto corrupt_out; 3360153323Srodrigc } 3361153323Srodrigc } 3362153323Srodrigc if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > 3363153323Srodrigc ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, 3364153323Srodrigc XFS_RANDOM_IFLUSH_5)) { 3365153323Srodrigc xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3366153323Srodrigc "xfs_iflush: detected corrupt incore inode %Lu, total extents = %d, nblocks = %Ld, ptr 0x%p", 3367153323Srodrigc ip->i_ino, 3368153323Srodrigc ip->i_d.di_nextents + ip->i_d.di_anextents, 3369153323Srodrigc ip->i_d.di_nblocks, 3370153323Srodrigc ip); 3371153323Srodrigc goto corrupt_out; 3372153323Srodrigc } 3373153323Srodrigc if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 3374153323Srodrigc mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { 3375153323Srodrigc xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3376153323Srodrigc "xfs_iflush: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 3377153323Srodrigc ip->i_ino, ip->i_d.di_forkoff, ip); 3378153323Srodrigc goto corrupt_out; 3379153323Srodrigc } 3380153323Srodrigc /* 3381153323Srodrigc * bump the flush iteration count, used to detect flushes which 3382153323Srodrigc * postdate a log record during recovery. 3383153323Srodrigc */ 3384153323Srodrigc 3385153323Srodrigc ip->i_d.di_flushiter++; 3386153323Srodrigc 3387153323Srodrigc /* 3388153323Srodrigc * Copy the dirty parts of the inode into the on-disk 3389153323Srodrigc * inode. We always copy out the core of the inode, 3390153323Srodrigc * because if the inode is dirty at all the core must 3391153323Srodrigc * be. 3392153323Srodrigc */ 3393159451Srodrigc xfs_xlate_dinode_core((xfs_caddr_t)&(dip->di_core), &(ip->i_d), -1); 3394153323Srodrigc 3395153323Srodrigc /* Wrap, we never let the log put out DI_MAX_FLUSH */ 3396153323Srodrigc if (ip->i_d.di_flushiter == DI_MAX_FLUSH) 3397153323Srodrigc ip->i_d.di_flushiter = 0; 3398153323Srodrigc 3399153323Srodrigc /* 3400153323Srodrigc * If this is really an old format inode and the superblock version 3401153323Srodrigc * has not been updated to support only new format inodes, then 3402153323Srodrigc * convert back to the old inode format. If the superblock version 3403153323Srodrigc * has been updated, then make the conversion permanent. 3404153323Srodrigc */ 3405153323Srodrigc ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || 3406153323Srodrigc XFS_SB_VERSION_HASNLINK(&mp->m_sb)); 3407153323Srodrigc if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { 3408153323Srodrigc if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { 3409153323Srodrigc /* 3410153323Srodrigc * Convert it back. 3411153323Srodrigc */ 3412153323Srodrigc ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); 3413153323Srodrigc INT_SET(dip->di_core.di_onlink, ARCH_CONVERT, ip->i_d.di_nlink); 3414153323Srodrigc } else { 3415153323Srodrigc /* 3416153323Srodrigc * The superblock version has already been bumped, 3417153323Srodrigc * so just make the conversion to the new inode 3418153323Srodrigc * format permanent. 3419153323Srodrigc */ 3420153323Srodrigc ip->i_d.di_version = XFS_DINODE_VERSION_2; 3421153323Srodrigc INT_SET(dip->di_core.di_version, ARCH_CONVERT, XFS_DINODE_VERSION_2); 3422153323Srodrigc ip->i_d.di_onlink = 0; 3423159451Srodrigc dip->di_core.di_onlink = 0; 3424153323Srodrigc memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 3425153323Srodrigc memset(&(dip->di_core.di_pad[0]), 0, 3426153323Srodrigc sizeof(dip->di_core.di_pad)); 3427153323Srodrigc ASSERT(ip->i_d.di_projid == 0); 3428153323Srodrigc } 3429153323Srodrigc } 3430153323Srodrigc 3431153323Srodrigc if (xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp) == EFSCORRUPTED) { 3432153323Srodrigc goto corrupt_out; 3433153323Srodrigc } 3434153323Srodrigc 3435153323Srodrigc if (XFS_IFORK_Q(ip)) { 3436153323Srodrigc /* 3437153323Srodrigc * The only error from xfs_iflush_fork is on the data fork. 3438153323Srodrigc */ 3439153323Srodrigc (void) xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); 3440153323Srodrigc } 3441153323Srodrigc xfs_inobp_check(mp, bp); 3442153323Srodrigc 3443153323Srodrigc /* 3444153323Srodrigc * We've recorded everything logged in the inode, so we'd 3445153323Srodrigc * like to clear the ilf_fields bits so we don't log and 3446153323Srodrigc * flush things unnecessarily. However, we can't stop 3447153323Srodrigc * logging all this information until the data we've copied 3448153323Srodrigc * into the disk buffer is written to disk. If we did we might 3449153323Srodrigc * overwrite the copy of the inode in the log with all the 3450153323Srodrigc * data after re-logging only part of it, and in the face of 3451153323Srodrigc * a crash we wouldn't have all the data we need to recover. 3452153323Srodrigc * 3453153323Srodrigc * What we do is move the bits to the ili_last_fields field. 3454153323Srodrigc * When logging the inode, these bits are moved back to the 3455153323Srodrigc * ilf_fields field. In the xfs_iflush_done() routine we 3456153323Srodrigc * clear ili_last_fields, since we know that the information 3457153323Srodrigc * those bits represent is permanently on disk. As long as 3458153323Srodrigc * the flush completes before the inode is logged again, then 3459153323Srodrigc * both ilf_fields and ili_last_fields will be cleared. 3460153323Srodrigc * 3461153323Srodrigc * We can play with the ilf_fields bits here, because the inode 3462153323Srodrigc * lock must be held exclusively in order to set bits there 3463153323Srodrigc * and the flush lock protects the ili_last_fields bits. 3464153323Srodrigc * Set ili_logged so the flush done 3465153323Srodrigc * routine can tell whether or not to look in the AIL. 3466153323Srodrigc * Also, store the current LSN of the inode so that we can tell 3467153323Srodrigc * whether the item has moved in the AIL from xfs_iflush_done(). 3468153323Srodrigc * In order to read the lsn we need the AIL lock, because 3469153323Srodrigc * it is a 64 bit value that cannot be read atomically. 3470153323Srodrigc */ 3471153323Srodrigc if (iip != NULL && iip->ili_format.ilf_fields != 0) { 3472153323Srodrigc iip->ili_last_fields = iip->ili_format.ilf_fields; 3473153323Srodrigc iip->ili_format.ilf_fields = 0; 3474153323Srodrigc iip->ili_logged = 1; 3475153323Srodrigc 3476153323Srodrigc ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */ 3477153323Srodrigc AIL_LOCK(mp,s); 3478153323Srodrigc iip->ili_flush_lsn = iip->ili_item.li_lsn; 3479153323Srodrigc AIL_UNLOCK(mp, s); 3480153323Srodrigc 3481153323Srodrigc /* 3482153323Srodrigc * Attach the function xfs_iflush_done to the inode's 3483153323Srodrigc * buffer. This will remove the inode from the AIL 3484153323Srodrigc * and unlock the inode's flush lock when the inode is 3485153323Srodrigc * completely written to disk. 3486153323Srodrigc */ 3487153323Srodrigc xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) 3488153323Srodrigc xfs_iflush_done, (xfs_log_item_t *)iip); 3489153323Srodrigc 3490153323Srodrigc ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 3491153323Srodrigc ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); 3492153323Srodrigc } else { 3493153323Srodrigc /* 3494153323Srodrigc * We're flushing an inode which is not in the AIL and has 3495153323Srodrigc * not been logged but has i_update_core set. For this 3496153323Srodrigc * case we can use a B_DELWRI flush and immediately drop 3497153323Srodrigc * the inode flush lock because we can avoid the whole 3498153323Srodrigc * AIL state thing. It's OK to drop the flush lock now, 3499153323Srodrigc * because we've already locked the buffer and to do anything 3500153323Srodrigc * you really need both. 3501153323Srodrigc */ 3502153323Srodrigc if (iip != NULL) { 3503153323Srodrigc ASSERT(iip->ili_logged == 0); 3504153323Srodrigc ASSERT(iip->ili_last_fields == 0); 3505153323Srodrigc ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0); 3506153323Srodrigc } 3507153323Srodrigc xfs_ifunlock(ip); 3508153323Srodrigc } 3509153323Srodrigc 3510153323Srodrigc return 0; 3511153323Srodrigc 3512153323Srodrigccorrupt_out: 3513153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 3514153323Srodrigc} 3515153323Srodrigc 3516159451Srodrigc 3517153323Srodrigc/* 3518159451Srodrigc * Flush all inactive inodes in mp. 3519153323Srodrigc */ 3520159451Srodrigcvoid 3521153323Srodrigcxfs_iflush_all( 3522159451Srodrigc xfs_mount_t *mp) 3523153323Srodrigc{ 3524153323Srodrigc int done; 3525153323Srodrigc int purged; 3526153323Srodrigc xfs_inode_t *ip; 3527153323Srodrigc xfs_vnode_t *vp; 3528153323Srodrigc 3529159451Srodrigc done = 0; 3530153323Srodrigc while (!done) { 3531153323Srodrigc purged = 0; 3532153323Srodrigc XFS_MOUNT_ILOCK(mp); 3533153323Srodrigc ip = mp->m_inodes; 3534153323Srodrigc if (ip == NULL) { 3535153323Srodrigc break; 3536153323Srodrigc } 3537153323Srodrigc do { 3538153323Srodrigc /* Make sure we skip markers inserted by sync */ 3539153323Srodrigc if (ip->i_mount == NULL) { 3540153323Srodrigc ip = ip->i_mnext; 3541153323Srodrigc continue; 3542153323Srodrigc } 3543153323Srodrigc 3544153323Srodrigc /* 3545153323Srodrigc * It's up to our caller to purge the root 3546153323Srodrigc * and quota vnodes later. 3547153323Srodrigc */ 3548153323Srodrigc vp = XFS_ITOV_NULL(ip); 3549153323Srodrigc 3550153323Srodrigc if (!vp) { 3551153323Srodrigc XFS_MOUNT_IUNLOCK(mp); 3552153323Srodrigc xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC); 3553153323Srodrigc purged = 1; 3554153323Srodrigc break; 3555153323Srodrigc } 3556153323Srodrigc 3557153323Srodrigc if (vn_count(vp) != 0) { 3558153323Srodrigc if (vn_count(vp) == 1 && 3559153323Srodrigc (ip == mp->m_rootip || 3560153323Srodrigc (mp->m_quotainfo && 3561153323Srodrigc (ip->i_ino == mp->m_sb.sb_uquotino || 3562153323Srodrigc ip->i_ino == mp->m_sb.sb_gquotino)))) { 3563153323Srodrigc ip = ip->i_mnext; 3564153323Srodrigc continue; 3565153323Srodrigc } 3566153323Srodrigc /* 3567153323Srodrigc * Ignore busy inodes but continue flushing 3568153323Srodrigc * others. 3569153323Srodrigc */ 3570153323Srodrigc ip = ip->i_mnext; 3571153323Srodrigc continue; 3572153323Srodrigc } 3573153323Srodrigc /* 3574153323Srodrigc * Sample vp mapping while holding mp locked on MP 3575153323Srodrigc * systems, so we don't purge a reclaimed or 3576153323Srodrigc * nonexistent vnode. We break from the loop 3577153323Srodrigc * since we know that we modify 3578153323Srodrigc * it by pulling ourselves from it in xfs_reclaim() 3579153323Srodrigc * called via vn_purge() below. Set ip to the next 3580153323Srodrigc * entry in the list anyway so we'll know below 3581153323Srodrigc * whether we reached the end or not. 3582153323Srodrigc */ 3583159451Srodrigc 3584153323Srodrigc XFS_MOUNT_IUNLOCK(mp); 3585159451Srodrigc vn_purge(vp); 3586153323Srodrigc purged = 1; 3587153323Srodrigc break; 3588153323Srodrigc } while (ip != mp->m_inodes); 3589153323Srodrigc /* 3590153323Srodrigc * We need to distinguish between when we exit the loop 3591153323Srodrigc * after a purge and when we simply hit the end of the 3592153323Srodrigc * list. We can't use the (ip == mp->m_inodes) test, 3593153323Srodrigc * because when we purge an inode at the start of the list 3594153323Srodrigc * the next inode on the list becomes mp->m_inodes. That 3595153323Srodrigc * would cause such a test to bail out early. The purged 3596153323Srodrigc * variable tells us how we got out of the loop. 3597153323Srodrigc */ 3598153323Srodrigc if (!purged) { 3599153323Srodrigc done = 1; 3600153323Srodrigc } 3601153323Srodrigc } 3602153323Srodrigc XFS_MOUNT_IUNLOCK(mp); 3603153323Srodrigc} 3604153323Srodrigc 3605153323Srodrigc/* 3606153323Srodrigc * xfs_iaccess: check accessibility of inode for mode. 3607159451Srodrigc * This function is quite linuxy now 3608159451Srodrigc * probably should be move to a os specfic location 3609153323Srodrigc */ 3610153323Srodrigcint 3611153323Srodrigcxfs_iaccess( 3612153323Srodrigc xfs_inode_t *ip, 3613184965Strasz accmode_t accmode, 3614153323Srodrigc cred_t *cr) 3615153323Srodrigc{ 3616153323Srodrigc xfs_vnode_t *vp; 3617153323Srodrigc int error; 3618159451Srodrigc 3619153323Srodrigc mode_t imode; 3620153323Srodrigc 3621153323Srodrigc vp = XFS_ITOV(ip); 3622159451Srodrigc /* FreeBSD local change here */ 3623159451Srodrigc imode = (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_vnode->v_type); 3624159451Srodrigc /* 3625159451Srodrigc * Verify that the MAC policy allows the requested access. 3626159451Srodrigc */ 3627184965Strasz if ((error = _MAC_XFS_IACCESS(ip, accmode, cr))) 3628159451Srodrigc return XFS_ERROR(error); 3629153323Srodrigc 3630184965Strasz if (accmode & VWRITE) { 3631153323Srodrigc xfs_mount_t *mp = ip->i_mount; 3632153323Srodrigc 3633153323Srodrigc if ((XVFSTOMNT(XFS_MTOVFS(mp))->mnt_flag & MNT_RDONLY) && 3634153323Srodrigc (S_ISREG(imode) || S_ISDIR(imode) || S_ISLNK(imode))) 3635153323Srodrigc return XFS_ERROR(EROFS); 3636153323Srodrigc 3637159147Simp#ifdef XXXKAN 3638153323Srodrigc if (IS_IMMUTABLE(inode)) 3639153323Srodrigc return XFS_ERROR(EACCES); 3640153323Srodrigc#endif 3641153323Srodrigc } 3642153323Srodrigc 3643153323Srodrigc /* 3644153323Srodrigc * If there's an Access Control List it's used instead of 3645153323Srodrigc * the mode bits. 3646153323Srodrigc */ 3647184965Strasz if ((error = _ACL_XFS_IACCESS(ip, accmode, cr)) != -1) 3648153323Srodrigc return error ? XFS_ERROR(error) : 0; 3649153323Srodrigc 3650153323Srodrigc 3651159451Srodrigc /* FreeBSD local change here */ 3652159451Srodrigc error = vaccess(vp->v_vnode->v_type, imode, ip->i_d.di_uid, ip->i_d.di_gid, 3653184965Strasz accmode, cr, NULL); 3654153323Srodrigc 3655153323Srodrigc return (error); 3656153323Srodrigc} 3657153323Srodrigc 3658153323Srodrigc/* 3659153323Srodrigc * xfs_iroundup: round up argument to next power of two 3660153323Srodrigc */ 3661153323Srodrigcuint 3662153323Srodrigcxfs_iroundup( 3663153323Srodrigc uint v) 3664153323Srodrigc{ 3665153323Srodrigc int i; 3666153323Srodrigc uint m; 3667153323Srodrigc 3668153323Srodrigc if ((v & (v - 1)) == 0) 3669153323Srodrigc return v; 3670153323Srodrigc ASSERT((v & 0x80000000) == 0); 3671153323Srodrigc if ((v & (v + 1)) == 0) 3672153323Srodrigc return v + 1; 3673153323Srodrigc for (i = 0, m = 1; i < 31; i++, m <<= 1) { 3674153323Srodrigc if (v & m) 3675153323Srodrigc continue; 3676153323Srodrigc v |= m; 3677153323Srodrigc if ((v & (v + 1)) == 0) 3678153323Srodrigc return v + 1; 3679153323Srodrigc } 3680153323Srodrigc ASSERT(0); 3681153323Srodrigc return( 0 ); 3682153323Srodrigc} 3683153323Srodrigc 3684159451Srodrigc#ifdef XFS_ILOCK_TRACE 3685159451Srodrigcktrace_t *xfs_ilock_trace_buf; 3686159451Srodrigc 3687159451Srodrigcvoid 3688159451Srodrigcxfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra) 3689159451Srodrigc{ 3690159451Srodrigc ktrace_enter(ip->i_lock_trace, 3691159451Srodrigc (void *)ip, 3692159451Srodrigc (void *)(unsigned long)lock, /* 1 = LOCK, 3=UNLOCK, etc */ 3693159451Srodrigc (void *)(unsigned long)lockflags, /* XFS_ILOCK_EXCL etc */ 3694159451Srodrigc (void *)ra, /* caller of ilock */ 3695159451Srodrigc (void *)(unsigned long)current_cpu(), 3696159451Srodrigc (void *)(unsigned long)current_pid(), 3697159451Srodrigc NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL); 3698159451Srodrigc} 3699159451Srodrigc#endif 3700159451Srodrigc 3701153323Srodrigc/* 3702159451Srodrigc * Return a pointer to the extent record at file index idx. 3703159451Srodrigc */ 3704159451Srodrigcxfs_bmbt_rec_t * 3705159451Srodrigcxfs_iext_get_ext( 3706159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 3707159451Srodrigc xfs_extnum_t idx) /* index of target extent */ 3708159451Srodrigc{ 3709159451Srodrigc ASSERT(idx >= 0); 3710159451Srodrigc if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { 3711159451Srodrigc return ifp->if_u1.if_ext_irec->er_extbuf; 3712159451Srodrigc } else if (ifp->if_flags & XFS_IFEXTIREC) { 3713159451Srodrigc xfs_ext_irec_t *erp; /* irec pointer */ 3714159451Srodrigc int erp_idx = 0; /* irec index */ 3715159451Srodrigc xfs_extnum_t page_idx = idx; /* ext index in target list */ 3716159451Srodrigc 3717159451Srodrigc erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 3718159451Srodrigc return &erp->er_extbuf[page_idx]; 3719159451Srodrigc } else if (ifp->if_bytes) { 3720159451Srodrigc return &ifp->if_u1.if_extents[idx]; 3721159451Srodrigc } else { 3722159451Srodrigc return NULL; 3723159451Srodrigc } 3724159451Srodrigc} 3725159451Srodrigc 3726159451Srodrigc/* 3727159451Srodrigc * Insert new item(s) into the extent records for incore inode 3728159451Srodrigc * fork 'ifp'. 'count' new items are inserted at index 'idx'. 3729159451Srodrigc */ 3730159451Srodrigcvoid 3731159451Srodrigcxfs_iext_insert( 3732159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 3733159451Srodrigc xfs_extnum_t idx, /* starting index of new items */ 3734159451Srodrigc xfs_extnum_t count, /* number of inserted items */ 3735159451Srodrigc xfs_bmbt_irec_t *new) /* items to insert */ 3736159451Srodrigc{ 3737159451Srodrigc xfs_bmbt_rec_t *ep; /* extent record pointer */ 3738159451Srodrigc xfs_extnum_t i; /* extent record index */ 3739159451Srodrigc 3740159451Srodrigc ASSERT(ifp->if_flags & XFS_IFEXTENTS); 3741159451Srodrigc xfs_iext_add(ifp, idx, count); 3742159451Srodrigc for (i = idx; i < idx + count; i++, new++) { 3743159451Srodrigc ep = xfs_iext_get_ext(ifp, i); 3744159451Srodrigc xfs_bmbt_set_all(ep, new); 3745159451Srodrigc } 3746159451Srodrigc} 3747159451Srodrigc 3748159451Srodrigc/* 3749159451Srodrigc * This is called when the amount of space required for incore file 3750159451Srodrigc * extents needs to be increased. The ext_diff parameter stores the 3751159451Srodrigc * number of new extents being added and the idx parameter contains 3752159451Srodrigc * the extent index where the new extents will be added. If the new 3753159451Srodrigc * extents are being appended, then we just need to (re)allocate and 3754159451Srodrigc * initialize the space. Otherwise, if the new extents are being 3755159451Srodrigc * inserted into the middle of the existing entries, a bit more work 3756159451Srodrigc * is required to make room for the new extents to be inserted. The 3757159451Srodrigc * caller is responsible for filling in the new extent entries upon 3758159451Srodrigc * return. 3759159451Srodrigc */ 3760159451Srodrigcvoid 3761159451Srodrigcxfs_iext_add( 3762159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 3763159451Srodrigc xfs_extnum_t idx, /* index to begin adding exts */ 3764159451Srodrigc int ext_diff) /* number of extents to add */ 3765159451Srodrigc{ 3766159451Srodrigc int byte_diff; /* new bytes being added */ 3767159451Srodrigc int new_size; /* size of extents after adding */ 3768159451Srodrigc xfs_extnum_t nextents; /* number of extents in file */ 3769159451Srodrigc 3770159451Srodrigc nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3771159451Srodrigc ASSERT((idx >= 0) && (idx <= nextents)); 3772159451Srodrigc byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); 3773159451Srodrigc new_size = ifp->if_bytes + byte_diff; 3774159451Srodrigc /* 3775159451Srodrigc * If the new number of extents (nextents + ext_diff) 3776159451Srodrigc * fits inside the inode, then continue to use the inline 3777159451Srodrigc * extent buffer. 3778159451Srodrigc */ 3779159451Srodrigc if (nextents + ext_diff <= XFS_INLINE_EXTS) { 3780159451Srodrigc if (idx < nextents) { 3781159451Srodrigc memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], 3782159451Srodrigc &ifp->if_u2.if_inline_ext[idx], 3783159451Srodrigc (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 3784159451Srodrigc memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); 3785159451Srodrigc } 3786159451Srodrigc ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 3787159451Srodrigc ifp->if_real_bytes = 0; 3788159451Srodrigc ifp->if_lastex = nextents + ext_diff; 3789159451Srodrigc } 3790159451Srodrigc /* 3791159451Srodrigc * Otherwise use a linear (direct) extent list. 3792159451Srodrigc * If the extents are currently inside the inode, 3793159451Srodrigc * xfs_iext_realloc_direct will switch us from 3794159451Srodrigc * inline to direct extent allocation mode. 3795159451Srodrigc */ 3796159451Srodrigc else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { 3797159451Srodrigc xfs_iext_realloc_direct(ifp, new_size); 3798159451Srodrigc if (idx < nextents) { 3799159451Srodrigc memmove(&ifp->if_u1.if_extents[idx + ext_diff], 3800159451Srodrigc &ifp->if_u1.if_extents[idx], 3801159451Srodrigc (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 3802159451Srodrigc memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); 3803159451Srodrigc } 3804159451Srodrigc } 3805159451Srodrigc /* Indirection array */ 3806159451Srodrigc else { 3807159451Srodrigc xfs_ext_irec_t *erp; 3808159451Srodrigc int erp_idx = 0; 3809159451Srodrigc int page_idx = idx; 3810159451Srodrigc 3811159451Srodrigc ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); 3812159451Srodrigc if (ifp->if_flags & XFS_IFEXTIREC) { 3813159451Srodrigc erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); 3814159451Srodrigc } else { 3815159451Srodrigc xfs_iext_irec_init(ifp); 3816159451Srodrigc ASSERT(ifp->if_flags & XFS_IFEXTIREC); 3817159451Srodrigc erp = ifp->if_u1.if_ext_irec; 3818159451Srodrigc } 3819159451Srodrigc /* Extents fit in target extent page */ 3820159451Srodrigc if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { 3821159451Srodrigc if (page_idx < erp->er_extcount) { 3822159451Srodrigc memmove(&erp->er_extbuf[page_idx + ext_diff], 3823159451Srodrigc &erp->er_extbuf[page_idx], 3824159451Srodrigc (erp->er_extcount - page_idx) * 3825159451Srodrigc sizeof(xfs_bmbt_rec_t)); 3826159451Srodrigc memset(&erp->er_extbuf[page_idx], 0, byte_diff); 3827159451Srodrigc } 3828159451Srodrigc erp->er_extcount += ext_diff; 3829159451Srodrigc xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 3830159451Srodrigc } 3831159451Srodrigc /* Insert a new extent page */ 3832159451Srodrigc else if (erp) { 3833159451Srodrigc xfs_iext_add_indirect_multi(ifp, 3834159451Srodrigc erp_idx, page_idx, ext_diff); 3835159451Srodrigc } 3836159451Srodrigc /* 3837159451Srodrigc * If extent(s) are being appended to the last page in 3838159451Srodrigc * the indirection array and the new extent(s) don't fit 3839159451Srodrigc * in the page, then erp is NULL and erp_idx is set to 3840159451Srodrigc * the next index needed in the indirection array. 3841159451Srodrigc */ 3842159451Srodrigc else { 3843159451Srodrigc int count = ext_diff; 3844159451Srodrigc 3845159451Srodrigc while (count) { 3846159451Srodrigc erp = xfs_iext_irec_new(ifp, erp_idx); 3847159451Srodrigc erp->er_extcount = count; 3848159451Srodrigc count -= MIN(count, (int)XFS_LINEAR_EXTS); 3849159451Srodrigc if (count) { 3850159451Srodrigc erp_idx++; 3851159451Srodrigc } 3852159451Srodrigc } 3853159451Srodrigc } 3854159451Srodrigc } 3855159451Srodrigc ifp->if_bytes = new_size; 3856159451Srodrigc} 3857159451Srodrigc 3858159451Srodrigc/* 3859159451Srodrigc * This is called when incore extents are being added to the indirection 3860159451Srodrigc * array and the new extents do not fit in the target extent list. The 3861159451Srodrigc * erp_idx parameter contains the irec index for the target extent list 3862159451Srodrigc * in the indirection array, and the idx parameter contains the extent 3863159451Srodrigc * index within the list. The number of extents being added is stored 3864159451Srodrigc * in the count parameter. 3865153323Srodrigc * 3866159451Srodrigc * |-------| |-------| 3867159451Srodrigc * | | | | idx - number of extents before idx 3868159451Srodrigc * | idx | | count | 3869159451Srodrigc * | | | | count - number of extents being inserted at idx 3870159451Srodrigc * |-------| |-------| 3871159451Srodrigc * | count | | nex2 | nex2 - number of extents after idx + count 3872159451Srodrigc * |-------| |-------| 3873153323Srodrigc */ 3874153323Srodrigcvoid 3875159451Srodrigcxfs_iext_add_indirect_multi( 3876159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 3877159451Srodrigc int erp_idx, /* target extent irec index */ 3878159451Srodrigc xfs_extnum_t idx, /* index within target list */ 3879159451Srodrigc int count) /* new extents being added */ 3880153323Srodrigc{ 3881159451Srodrigc int byte_diff; /* new bytes being added */ 3882159451Srodrigc xfs_ext_irec_t *erp; /* pointer to irec entry */ 3883159451Srodrigc xfs_extnum_t ext_diff; /* number of extents to add */ 3884159451Srodrigc xfs_extnum_t ext_cnt; /* new extents still needed */ 3885159451Srodrigc xfs_extnum_t nex2; /* extents after idx + count */ 3886159451Srodrigc xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ 3887159451Srodrigc int nlists; /* number of irec's (lists) */ 3888159451Srodrigc 3889159451Srodrigc ASSERT(ifp->if_flags & XFS_IFEXTIREC); 3890159451Srodrigc erp = &ifp->if_u1.if_ext_irec[erp_idx]; 3891159451Srodrigc nex2 = erp->er_extcount - idx; 3892159451Srodrigc nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 3893159451Srodrigc 3894153323Srodrigc /* 3895159451Srodrigc * Save second part of target extent list 3896159451Srodrigc * (all extents past */ 3897159451Srodrigc if (nex2) { 3898159451Srodrigc byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 3899159451Srodrigc nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP); 3900159451Srodrigc memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); 3901159451Srodrigc erp->er_extcount -= nex2; 3902159451Srodrigc xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); 3903159451Srodrigc memset(&erp->er_extbuf[idx], 0, byte_diff); 3904159451Srodrigc } 3905159451Srodrigc 3906159451Srodrigc /* 3907159451Srodrigc * Add the new extents to the end of the target 3908159451Srodrigc * list, then allocate new irec record(s) and 3909159451Srodrigc * extent buffer(s) as needed to store the rest 3910159451Srodrigc * of the new extents. 3911153323Srodrigc */ 3912159451Srodrigc ext_cnt = count; 3913159451Srodrigc ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); 3914159451Srodrigc if (ext_diff) { 3915159451Srodrigc erp->er_extcount += ext_diff; 3916159451Srodrigc xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 3917159451Srodrigc ext_cnt -= ext_diff; 3918159451Srodrigc } 3919159451Srodrigc while (ext_cnt) { 3920159451Srodrigc erp_idx++; 3921159451Srodrigc erp = xfs_iext_irec_new(ifp, erp_idx); 3922159451Srodrigc ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); 3923159451Srodrigc erp->er_extcount = ext_diff; 3924159451Srodrigc xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 3925159451Srodrigc ext_cnt -= ext_diff; 3926159451Srodrigc } 3927159451Srodrigc 3928159451Srodrigc /* Add nex2 extents back to indirection array */ 3929159451Srodrigc if (nex2) { 3930159451Srodrigc xfs_extnum_t ext_avail; 3931159451Srodrigc int i; 3932159451Srodrigc 3933159451Srodrigc byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 3934159451Srodrigc ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; 3935159451Srodrigc i = 0; 3936159451Srodrigc /* 3937159451Srodrigc * If nex2 extents fit in the current page, append 3938159451Srodrigc * nex2_ep after the new extents. 3939159451Srodrigc */ 3940159451Srodrigc if (nex2 <= ext_avail) { 3941159451Srodrigc i = erp->er_extcount; 3942159451Srodrigc } 3943159451Srodrigc /* 3944159451Srodrigc * Otherwise, check if space is available in the 3945159451Srodrigc * next page. 3946159451Srodrigc */ 3947159451Srodrigc else if ((erp_idx < nlists - 1) && 3948159451Srodrigc (nex2 <= (ext_avail = XFS_LINEAR_EXTS - 3949159451Srodrigc ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { 3950159451Srodrigc erp_idx++; 3951159451Srodrigc erp++; 3952159451Srodrigc /* Create a hole for nex2 extents */ 3953159451Srodrigc memmove(&erp->er_extbuf[nex2], erp->er_extbuf, 3954159451Srodrigc erp->er_extcount * sizeof(xfs_bmbt_rec_t)); 3955159451Srodrigc } 3956159451Srodrigc /* 3957159451Srodrigc * Final choice, create a new extent page for 3958159451Srodrigc * nex2 extents. 3959159451Srodrigc */ 3960159451Srodrigc else { 3961159451Srodrigc erp_idx++; 3962159451Srodrigc erp = xfs_iext_irec_new(ifp, erp_idx); 3963159451Srodrigc } 3964159451Srodrigc memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); 3965159451Srodrigc kmem_free(nex2_ep, byte_diff); 3966159451Srodrigc erp->er_extcount += nex2; 3967159451Srodrigc xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); 3968159451Srodrigc } 3969159451Srodrigc} 3970159451Srodrigc 3971159451Srodrigc/* 3972159451Srodrigc * This is called when the amount of space required for incore file 3973159451Srodrigc * extents needs to be decreased. The ext_diff parameter stores the 3974159451Srodrigc * number of extents to be removed and the idx parameter contains 3975159451Srodrigc * the extent index where the extents will be removed from. 3976159451Srodrigc * 3977159451Srodrigc * If the amount of space needed has decreased below the linear 3978159451Srodrigc * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous 3979159451Srodrigc * extent array. Otherwise, use kmem_realloc() to adjust the 3980159451Srodrigc * size to what is needed. 3981159451Srodrigc */ 3982159451Srodrigcvoid 3983159451Srodrigcxfs_iext_remove( 3984159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 3985159451Srodrigc xfs_extnum_t idx, /* index to begin removing exts */ 3986159451Srodrigc int ext_diff) /* number of extents to remove */ 3987159451Srodrigc{ 3988159451Srodrigc xfs_extnum_t nextents; /* number of extents in file */ 3989159451Srodrigc int new_size; /* size of extents after removal */ 3990159451Srodrigc 3991159451Srodrigc ASSERT(ext_diff > 0); 3992159451Srodrigc nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3993159451Srodrigc new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); 3994159451Srodrigc 3995159451Srodrigc if (new_size == 0) { 3996159451Srodrigc xfs_iext_destroy(ifp); 3997159451Srodrigc } else if (ifp->if_flags & XFS_IFEXTIREC) { 3998159451Srodrigc xfs_iext_remove_indirect(ifp, idx, ext_diff); 3999159451Srodrigc } else if (ifp->if_real_bytes) { 4000159451Srodrigc xfs_iext_remove_direct(ifp, idx, ext_diff); 4001159451Srodrigc } else { 4002159451Srodrigc xfs_iext_remove_inline(ifp, idx, ext_diff); 4003159451Srodrigc } 4004159451Srodrigc ifp->if_bytes = new_size; 4005159451Srodrigc} 4006159451Srodrigc 4007159451Srodrigc/* 4008159451Srodrigc * This removes ext_diff extents from the inline buffer, beginning 4009159451Srodrigc * at extent index idx. 4010159451Srodrigc */ 4011159451Srodrigcvoid 4012159451Srodrigcxfs_iext_remove_inline( 4013159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 4014159451Srodrigc xfs_extnum_t idx, /* index to begin removing exts */ 4015159451Srodrigc int ext_diff) /* number of extents to remove */ 4016159451Srodrigc{ 4017159451Srodrigc int nextents; /* number of extents in file */ 4018159451Srodrigc 4019159451Srodrigc ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 4020159451Srodrigc ASSERT(idx < XFS_INLINE_EXTS); 4021159451Srodrigc nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4022159451Srodrigc ASSERT(((nextents - ext_diff) > 0) && 4023159451Srodrigc (nextents - ext_diff) < XFS_INLINE_EXTS); 4024159451Srodrigc 4025159451Srodrigc if (idx + ext_diff < nextents) { 4026159451Srodrigc memmove(&ifp->if_u2.if_inline_ext[idx], 4027159451Srodrigc &ifp->if_u2.if_inline_ext[idx + ext_diff], 4028159451Srodrigc (nextents - (idx + ext_diff)) * 4029159451Srodrigc sizeof(xfs_bmbt_rec_t)); 4030159451Srodrigc memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], 4031159451Srodrigc 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 4032159451Srodrigc } else { 4033159451Srodrigc memset(&ifp->if_u2.if_inline_ext[idx], 0, 4034159451Srodrigc ext_diff * sizeof(xfs_bmbt_rec_t)); 4035159451Srodrigc } 4036159451Srodrigc} 4037159451Srodrigc 4038159451Srodrigc/* 4039159451Srodrigc * This removes ext_diff extents from a linear (direct) extent list, 4040159451Srodrigc * beginning at extent index idx. If the extents are being removed 4041159451Srodrigc * from the end of the list (ie. truncate) then we just need to re- 4042159451Srodrigc * allocate the list to remove the extra space. Otherwise, if the 4043159451Srodrigc * extents are being removed from the middle of the existing extent 4044159451Srodrigc * entries, then we first need to move the extent records beginning 4045159451Srodrigc * at idx + ext_diff up in the list to overwrite the records being 4046159451Srodrigc * removed, then remove the extra space via kmem_realloc. 4047159451Srodrigc */ 4048159451Srodrigcvoid 4049159451Srodrigcxfs_iext_remove_direct( 4050159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 4051159451Srodrigc xfs_extnum_t idx, /* index to begin removing exts */ 4052159451Srodrigc int ext_diff) /* number of extents to remove */ 4053159451Srodrigc{ 4054159451Srodrigc xfs_extnum_t nextents; /* number of extents in file */ 4055159451Srodrigc int new_size; /* size of extents after removal */ 4056159451Srodrigc 4057159451Srodrigc ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 4058159451Srodrigc new_size = ifp->if_bytes - 4059159451Srodrigc (ext_diff * sizeof(xfs_bmbt_rec_t)); 4060159451Srodrigc nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4061159451Srodrigc 4062159451Srodrigc if (new_size == 0) { 4063159451Srodrigc xfs_iext_destroy(ifp); 4064153323Srodrigc return; 4065159451Srodrigc } 4066159451Srodrigc /* Move extents up in the list (if needed) */ 4067159451Srodrigc if (idx + ext_diff < nextents) { 4068159451Srodrigc memmove(&ifp->if_u1.if_extents[idx], 4069159451Srodrigc &ifp->if_u1.if_extents[idx + ext_diff], 4070159451Srodrigc (nextents - (idx + ext_diff)) * 4071159451Srodrigc sizeof(xfs_bmbt_rec_t)); 4072159451Srodrigc } 4073159451Srodrigc memset(&ifp->if_u1.if_extents[nextents - ext_diff], 4074159451Srodrigc 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 4075159451Srodrigc /* 4076159451Srodrigc * Reallocate the direct extent list. If the extents 4077159451Srodrigc * will fit inside the inode then xfs_iext_realloc_direct 4078159451Srodrigc * will switch from direct to inline extent allocation 4079159451Srodrigc * mode for us. 4080159451Srodrigc */ 4081159451Srodrigc xfs_iext_realloc_direct(ifp, new_size); 4082159451Srodrigc ifp->if_bytes = new_size; 4083159451Srodrigc} 4084153323Srodrigc 4085159451Srodrigc/* 4086159451Srodrigc * This is called when incore extents are being removed from the 4087159451Srodrigc * indirection array and the extents being removed span multiple extent 4088159451Srodrigc * buffers. The idx parameter contains the file extent index where we 4089159451Srodrigc * want to begin removing extents, and the count parameter contains 4090159451Srodrigc * how many extents need to be removed. 4091159451Srodrigc * 4092159451Srodrigc * |-------| |-------| 4093159451Srodrigc * | nex1 | | | nex1 - number of extents before idx 4094159451Srodrigc * |-------| | count | 4095159451Srodrigc * | | | | count - number of extents being removed at idx 4096159451Srodrigc * | count | |-------| 4097159451Srodrigc * | | | nex2 | nex2 - number of extents after idx + count 4098159451Srodrigc * |-------| |-------| 4099159451Srodrigc */ 4100159451Srodrigcvoid 4101159451Srodrigcxfs_iext_remove_indirect( 4102159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 4103159451Srodrigc xfs_extnum_t idx, /* index to begin removing extents */ 4104159451Srodrigc int count) /* number of extents to remove */ 4105159451Srodrigc{ 4106159451Srodrigc xfs_ext_irec_t *erp; /* indirection array pointer */ 4107159451Srodrigc int erp_idx = 0; /* indirection array index */ 4108159451Srodrigc xfs_extnum_t ext_cnt; /* extents left to remove */ 4109159451Srodrigc xfs_extnum_t ext_diff; /* extents to remove in current list */ 4110159451Srodrigc xfs_extnum_t nex1; /* number of extents before idx */ 4111159451Srodrigc xfs_extnum_t nex2; /* extents after idx + count */ 4112159451Srodrigc int nlists; /* entries in indirection array */ 4113159451Srodrigc int page_idx = idx; /* index in target extent list */ 4114159451Srodrigc 4115159451Srodrigc ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4116159451Srodrigc erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 4117159451Srodrigc ASSERT(erp != NULL); 4118159451Srodrigc nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4119159451Srodrigc nex1 = page_idx; 4120159451Srodrigc ext_cnt = count; 4121159451Srodrigc while (ext_cnt) { 4122159451Srodrigc nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); 4123159451Srodrigc ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); 4124159451Srodrigc /* 4125159451Srodrigc * Check for deletion of entire list; 4126159451Srodrigc * xfs_iext_irec_remove() updates extent offsets. 4127159451Srodrigc */ 4128159451Srodrigc if (ext_diff == erp->er_extcount) { 4129159451Srodrigc xfs_iext_irec_remove(ifp, erp_idx); 4130159451Srodrigc ext_cnt -= ext_diff; 4131159451Srodrigc nex1 = 0; 4132159451Srodrigc if (ext_cnt) { 4133159451Srodrigc ASSERT(erp_idx < ifp->if_real_bytes / 4134159451Srodrigc XFS_IEXT_BUFSZ); 4135159451Srodrigc erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4136159451Srodrigc nex1 = 0; 4137159451Srodrigc continue; 4138159451Srodrigc } else { 4139159451Srodrigc break; 4140159451Srodrigc } 4141159451Srodrigc } 4142159451Srodrigc /* Move extents up (if needed) */ 4143159451Srodrigc if (nex2) { 4144159451Srodrigc memmove(&erp->er_extbuf[nex1], 4145159451Srodrigc &erp->er_extbuf[nex1 + ext_diff], 4146159451Srodrigc nex2 * sizeof(xfs_bmbt_rec_t)); 4147159451Srodrigc } 4148159451Srodrigc /* Zero out rest of page */ 4149159451Srodrigc memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - 4150159451Srodrigc ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); 4151159451Srodrigc /* Update remaining counters */ 4152159451Srodrigc erp->er_extcount -= ext_diff; 4153159451Srodrigc xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); 4154159451Srodrigc ext_cnt -= ext_diff; 4155159451Srodrigc nex1 = 0; 4156159451Srodrigc erp_idx++; 4157159451Srodrigc erp++; 4158159451Srodrigc } 4159159451Srodrigc ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); 4160159451Srodrigc xfs_iext_irec_compact(ifp); 4161159451Srodrigc} 4162159451Srodrigc 4163159451Srodrigc/* 4164159451Srodrigc * Create, destroy, or resize a linear (direct) block of extents. 4165159451Srodrigc */ 4166159451Srodrigcvoid 4167159451Srodrigcxfs_iext_realloc_direct( 4168159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 4169159451Srodrigc int new_size) /* new size of extents */ 4170159451Srodrigc{ 4171159451Srodrigc int rnew_size; /* real new size of extents */ 4172159451Srodrigc 4173159451Srodrigc rnew_size = new_size; 4174159451Srodrigc 4175159451Srodrigc ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || 4176159451Srodrigc ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && 4177159451Srodrigc (new_size != ifp->if_real_bytes))); 4178159451Srodrigc 4179159451Srodrigc /* Free extent records */ 4180159451Srodrigc if (new_size == 0) { 4181159451Srodrigc xfs_iext_destroy(ifp); 4182159451Srodrigc } 4183159451Srodrigc /* Resize direct extent list and zero any new bytes */ 4184159451Srodrigc else if (ifp->if_real_bytes) { 4185159451Srodrigc /* Check if extents will fit inside the inode */ 4186159451Srodrigc if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { 4187159451Srodrigc xfs_iext_direct_to_inline(ifp, new_size / 4188159451Srodrigc (uint)sizeof(xfs_bmbt_rec_t)); 4189159451Srodrigc ifp->if_bytes = new_size; 4190159451Srodrigc return; 4191159451Srodrigc } 4192159451Srodrigc if ((new_size & (new_size - 1)) != 0) { 4193159451Srodrigc rnew_size = xfs_iroundup(new_size); 4194159451Srodrigc } 4195159451Srodrigc if (rnew_size != ifp->if_real_bytes) { 4196159451Srodrigc ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) 4197159451Srodrigc kmem_realloc(ifp->if_u1.if_extents, 4198159451Srodrigc rnew_size, 4199159451Srodrigc ifp->if_real_bytes, 4200159451Srodrigc KM_SLEEP); 4201159451Srodrigc } 4202159451Srodrigc if (rnew_size > ifp->if_real_bytes) { 4203159451Srodrigc memset(&ifp->if_u1.if_extents[ifp->if_bytes / 4204159451Srodrigc (uint)sizeof(xfs_bmbt_rec_t)], 0, 4205159451Srodrigc rnew_size - ifp->if_real_bytes); 4206159451Srodrigc } 4207159451Srodrigc } 4208153323Srodrigc /* 4209159451Srodrigc * Switch from the inline extent buffer to a direct 4210159451Srodrigc * extent list. Be sure to include the inline extent 4211159451Srodrigc * bytes in new_size. 4212153323Srodrigc */ 4213159451Srodrigc else { 4214159451Srodrigc new_size += ifp->if_bytes; 4215159451Srodrigc if ((new_size & (new_size - 1)) != 0) { 4216159451Srodrigc rnew_size = xfs_iroundup(new_size); 4217159451Srodrigc } 4218159451Srodrigc xfs_iext_inline_to_direct(ifp, rnew_size); 4219159451Srodrigc } 4220159451Srodrigc ifp->if_real_bytes = rnew_size; 4221159451Srodrigc ifp->if_bytes = new_size; 4222159451Srodrigc} 4223153323Srodrigc 4224159451Srodrigc/* 4225159451Srodrigc * Switch from linear (direct) extent records to inline buffer. 4226159451Srodrigc */ 4227159451Srodrigcvoid 4228159451Srodrigcxfs_iext_direct_to_inline( 4229159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 4230159451Srodrigc xfs_extnum_t nextents) /* number of extents in file */ 4231159451Srodrigc{ 4232159451Srodrigc ASSERT(ifp->if_flags & XFS_IFEXTENTS); 4233159451Srodrigc ASSERT(nextents <= XFS_INLINE_EXTS); 4234159451Srodrigc /* 4235159451Srodrigc * The inline buffer was zeroed when we switched 4236159451Srodrigc * from inline to direct extent allocation mode, 4237159451Srodrigc * so we don't need to clear it here. 4238159451Srodrigc */ 4239159451Srodrigc memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, 4240159451Srodrigc nextents * sizeof(xfs_bmbt_rec_t)); 4241159451Srodrigc kmem_free(ifp->if_u1.if_extents, KM_SLEEP); 4242159451Srodrigc ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 4243159451Srodrigc ifp->if_real_bytes = 0; 4244159451Srodrigc} 4245159451Srodrigc 4246159451Srodrigc/* 4247159451Srodrigc * Switch from inline buffer to linear (direct) extent records. 4248159451Srodrigc * new_size should already be rounded up to the next power of 2 4249159451Srodrigc * by the caller (when appropriate), so use new_size as it is. 4250159451Srodrigc * However, since new_size may be rounded up, we can't update 4251159451Srodrigc * if_bytes here. It is the caller's responsibility to update 4252159451Srodrigc * if_bytes upon return. 4253159451Srodrigc */ 4254159451Srodrigcvoid 4255159451Srodrigcxfs_iext_inline_to_direct( 4256159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 4257159451Srodrigc int new_size) /* number of extents in file */ 4258159451Srodrigc{ 4259159451Srodrigc ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) 4260159451Srodrigc kmem_alloc(new_size, KM_SLEEP); 4261159451Srodrigc memset(ifp->if_u1.if_extents, 0, new_size); 4262159451Srodrigc if (ifp->if_bytes) { 4263159451Srodrigc memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, 4264159451Srodrigc ifp->if_bytes); 4265159451Srodrigc memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 4266159451Srodrigc sizeof(xfs_bmbt_rec_t)); 4267153323Srodrigc } 4268159451Srodrigc ifp->if_real_bytes = new_size; 4269159451Srodrigc} 4270159451Srodrigc 4271159451Srodrigc/* 4272159451Srodrigc * Resize an extent indirection array to new_size bytes. 4273159451Srodrigc */ 4274159451Srodrigcvoid 4275159451Srodrigcxfs_iext_realloc_indirect( 4276159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 4277159451Srodrigc int new_size) /* new indirection array size */ 4278159451Srodrigc{ 4279159451Srodrigc int nlists; /* number of irec's (ex lists) */ 4280159451Srodrigc int size; /* current indirection array size */ 4281159451Srodrigc 4282159451Srodrigc ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4283159451Srodrigc nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4284159451Srodrigc size = nlists * sizeof(xfs_ext_irec_t); 4285159451Srodrigc ASSERT(ifp->if_real_bytes); 4286159451Srodrigc ASSERT((new_size >= 0) && (new_size != size)); 4287159451Srodrigc if (new_size == 0) { 4288159451Srodrigc xfs_iext_destroy(ifp); 4289159451Srodrigc } else { 4290159451Srodrigc ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) 4291159451Srodrigc kmem_realloc(ifp->if_u1.if_ext_irec, 4292159451Srodrigc new_size, size, KM_SLEEP); 4293153323Srodrigc } 4294159451Srodrigc} 4295159451Srodrigc 4296159451Srodrigc/* 4297159451Srodrigc * Switch from indirection array to linear (direct) extent allocations. 4298159451Srodrigc */ 4299159451Srodrigcvoid 4300159451Srodrigcxfs_iext_indirect_to_direct( 4301159451Srodrigc xfs_ifork_t *ifp) /* inode fork pointer */ 4302159451Srodrigc{ 4303159451Srodrigc xfs_bmbt_rec_t *ep; /* extent record pointer */ 4304159451Srodrigc xfs_extnum_t nextents; /* number of extents in file */ 4305159451Srodrigc int size; /* size of file extents */ 4306159451Srodrigc 4307159451Srodrigc ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4308159451Srodrigc nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4309159451Srodrigc ASSERT(nextents <= XFS_LINEAR_EXTS); 4310159451Srodrigc size = nextents * sizeof(xfs_bmbt_rec_t); 4311159451Srodrigc 4312159451Srodrigc xfs_iext_irec_compact_full(ifp); 4313159451Srodrigc ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); 4314159451Srodrigc 4315159451Srodrigc ep = ifp->if_u1.if_ext_irec->er_extbuf; 4316159451Srodrigc kmem_free(ifp->if_u1.if_ext_irec, sizeof(xfs_ext_irec_t)); 4317159451Srodrigc ifp->if_flags &= ~XFS_IFEXTIREC; 4318159451Srodrigc ifp->if_u1.if_extents = ep; 4319159451Srodrigc ifp->if_bytes = size; 4320159451Srodrigc if (nextents < XFS_LINEAR_EXTS) { 4321159451Srodrigc xfs_iext_realloc_direct(ifp, size); 4322153323Srodrigc } 4323159451Srodrigc} 4324153323Srodrigc 4325159451Srodrigc/* 4326159451Srodrigc * Free incore file extents. 4327159451Srodrigc */ 4328159451Srodrigcvoid 4329159451Srodrigcxfs_iext_destroy( 4330159451Srodrigc xfs_ifork_t *ifp) /* inode fork pointer */ 4331159451Srodrigc{ 4332159451Srodrigc if (ifp->if_flags & XFS_IFEXTIREC) { 4333159451Srodrigc int erp_idx; 4334159451Srodrigc int nlists; 4335159451Srodrigc 4336159451Srodrigc nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4337159451Srodrigc for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { 4338159451Srodrigc xfs_iext_irec_remove(ifp, erp_idx); 4339159451Srodrigc } 4340159451Srodrigc ifp->if_flags &= ~XFS_IFEXTIREC; 4341159451Srodrigc } else if (ifp->if_real_bytes) { 4342159451Srodrigc kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); 4343159451Srodrigc } else if (ifp->if_bytes) { 4344159451Srodrigc memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 4345159451Srodrigc sizeof(xfs_bmbt_rec_t)); 4346159451Srodrigc } 4347159451Srodrigc ifp->if_u1.if_extents = NULL; 4348159451Srodrigc ifp->if_real_bytes = 0; 4349159451Srodrigc ifp->if_bytes = 0; 4350159451Srodrigc} 4351159451Srodrigc 4352159451Srodrigc/* 4353159451Srodrigc * Return a pointer to the extent record for file system block bno. 4354159451Srodrigc */ 4355159451Srodrigcxfs_bmbt_rec_t * /* pointer to found extent record */ 4356159451Srodrigcxfs_iext_bno_to_ext( 4357159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 4358159451Srodrigc xfs_fileoff_t bno, /* block number to search for */ 4359159451Srodrigc xfs_extnum_t *idxp) /* index of target extent */ 4360159451Srodrigc{ 4361159451Srodrigc xfs_bmbt_rec_t *base; /* pointer to first extent */ 4362159451Srodrigc xfs_filblks_t blockcount = 0; /* number of blocks in extent */ 4363159451Srodrigc xfs_bmbt_rec_t *ep = NULL; /* pointer to target extent */ 4364159451Srodrigc xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 4365159451Srodrigc int high; /* upper boundary in search */ 4366159451Srodrigc xfs_extnum_t idx = 0; /* index of target extent */ 4367159451Srodrigc int low; /* lower boundary in search */ 4368159451Srodrigc xfs_extnum_t nextents; /* number of file extents */ 4369159451Srodrigc xfs_fileoff_t startoff = 0; /* start offset of extent */ 4370159451Srodrigc 4371159451Srodrigc nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4372159451Srodrigc if (nextents == 0) { 4373159451Srodrigc *idxp = 0; 4374159451Srodrigc return NULL; 4375159451Srodrigc } 4376159451Srodrigc low = 0; 4377159451Srodrigc if (ifp->if_flags & XFS_IFEXTIREC) { 4378159451Srodrigc /* Find target extent list */ 4379159451Srodrigc int erp_idx = 0; 4380159451Srodrigc erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); 4381159451Srodrigc base = erp->er_extbuf; 4382159451Srodrigc high = erp->er_extcount - 1; 4383159451Srodrigc } else { 4384159451Srodrigc base = ifp->if_u1.if_extents; 4385159451Srodrigc high = nextents - 1; 4386159451Srodrigc } 4387159451Srodrigc /* Binary search extent records */ 4388159451Srodrigc while (low <= high) { 4389159451Srodrigc idx = (low + high) >> 1; 4390159451Srodrigc ep = base + idx; 4391159451Srodrigc startoff = xfs_bmbt_get_startoff(ep); 4392159451Srodrigc blockcount = xfs_bmbt_get_blockcount(ep); 4393159451Srodrigc if (bno < startoff) { 4394159451Srodrigc high = idx - 1; 4395159451Srodrigc } else if (bno >= startoff + blockcount) { 4396159451Srodrigc low = idx + 1; 4397159451Srodrigc } else { 4398159451Srodrigc /* Convert back to file-based extent index */ 4399159451Srodrigc if (ifp->if_flags & XFS_IFEXTIREC) { 4400159451Srodrigc idx += erp->er_extoff; 4401159451Srodrigc } 4402159451Srodrigc *idxp = idx; 4403159451Srodrigc return ep; 4404159451Srodrigc } 4405159451Srodrigc } 4406159451Srodrigc /* Convert back to file-based extent index */ 4407159451Srodrigc if (ifp->if_flags & XFS_IFEXTIREC) { 4408159451Srodrigc idx += erp->er_extoff; 4409159451Srodrigc } 4410159451Srodrigc if (bno >= startoff + blockcount) { 4411159451Srodrigc if (++idx == nextents) { 4412159451Srodrigc ep = NULL; 4413159451Srodrigc } else { 4414159451Srodrigc ep = xfs_iext_get_ext(ifp, idx); 4415159451Srodrigc } 4416159451Srodrigc } 4417159451Srodrigc *idxp = idx; 4418159451Srodrigc return ep; 4419159451Srodrigc} 4420159451Srodrigc 4421159451Srodrigc/* 4422159451Srodrigc * Return a pointer to the indirection array entry containing the 4423159451Srodrigc * extent record for filesystem block bno. Store the index of the 4424159451Srodrigc * target irec in *erp_idxp. 4425159451Srodrigc */ 4426159451Srodrigcxfs_ext_irec_t * /* pointer to found extent record */ 4427159451Srodrigcxfs_iext_bno_to_irec( 4428159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 4429159451Srodrigc xfs_fileoff_t bno, /* block number to search for */ 4430159451Srodrigc int *erp_idxp) /* irec index of target ext list */ 4431159451Srodrigc{ 4432159451Srodrigc xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 4433159451Srodrigc xfs_ext_irec_t *erp_next; /* next indirection array entry */ 4434159451Srodrigc int erp_idx; /* indirection array index */ 4435159451Srodrigc int nlists; /* number of extent irec's (lists) */ 4436159451Srodrigc int high; /* binary search upper limit */ 4437159451Srodrigc int low; /* binary search lower limit */ 4438159451Srodrigc 4439159451Srodrigc ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4440159451Srodrigc nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4441159451Srodrigc erp_idx = 0; 4442159451Srodrigc low = 0; 4443159451Srodrigc high = nlists - 1; 4444159451Srodrigc while (low <= high) { 4445159451Srodrigc erp_idx = (low + high) >> 1; 4446159451Srodrigc erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4447159451Srodrigc erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; 4448159451Srodrigc if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { 4449159451Srodrigc high = erp_idx - 1; 4450159451Srodrigc } else if (erp_next && bno >= 4451159451Srodrigc xfs_bmbt_get_startoff(erp_next->er_extbuf)) { 4452159451Srodrigc low = erp_idx + 1; 4453159451Srodrigc } else { 4454159451Srodrigc break; 4455159451Srodrigc } 4456159451Srodrigc } 4457159451Srodrigc *erp_idxp = erp_idx; 4458159451Srodrigc return erp; 4459159451Srodrigc} 4460159451Srodrigc 4461159451Srodrigc/* 4462159451Srodrigc * Return a pointer to the indirection array entry containing the 4463159451Srodrigc * extent record at file extent index *idxp. Store the index of the 4464159451Srodrigc * target irec in *erp_idxp and store the page index of the target 4465159451Srodrigc * extent record in *idxp. 4466159451Srodrigc */ 4467159451Srodrigcxfs_ext_irec_t * 4468159451Srodrigcxfs_iext_idx_to_irec( 4469159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 4470159451Srodrigc xfs_extnum_t *idxp, /* extent index (file -> page) */ 4471159451Srodrigc int *erp_idxp, /* pointer to target irec */ 4472159451Srodrigc int realloc) /* new bytes were just added */ 4473159451Srodrigc{ 4474159451Srodrigc xfs_ext_irec_t *prev; /* pointer to previous irec */ 4475159451Srodrigc xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ 4476159451Srodrigc int erp_idx; /* indirection array index */ 4477159451Srodrigc int nlists; /* number of irec's (ex lists) */ 4478159451Srodrigc int high; /* binary search upper limit */ 4479159451Srodrigc int low; /* binary search lower limit */ 4480159451Srodrigc xfs_extnum_t page_idx = *idxp; /* extent index in target list */ 4481159451Srodrigc 4482159451Srodrigc ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4483159451Srodrigc ASSERT(page_idx >= 0 && page_idx <= 4484159451Srodrigc ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); 4485159451Srodrigc nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4486159451Srodrigc erp_idx = 0; 4487159451Srodrigc low = 0; 4488159451Srodrigc high = nlists - 1; 4489159451Srodrigc 4490159451Srodrigc /* Binary search extent irec's */ 4491159451Srodrigc while (low <= high) { 4492159451Srodrigc erp_idx = (low + high) >> 1; 4493159451Srodrigc erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4494159451Srodrigc prev = erp_idx > 0 ? erp - 1 : NULL; 4495159451Srodrigc if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && 4496159451Srodrigc realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { 4497159451Srodrigc high = erp_idx - 1; 4498159451Srodrigc } else if (page_idx > erp->er_extoff + erp->er_extcount || 4499159451Srodrigc (page_idx == erp->er_extoff + erp->er_extcount && 4500159451Srodrigc !realloc)) { 4501159451Srodrigc low = erp_idx + 1; 4502159451Srodrigc } else if (page_idx == erp->er_extoff + erp->er_extcount && 4503159451Srodrigc erp->er_extcount == XFS_LINEAR_EXTS) { 4504159451Srodrigc ASSERT(realloc); 4505159451Srodrigc page_idx = 0; 4506159451Srodrigc erp_idx++; 4507159451Srodrigc erp = erp_idx < nlists ? erp + 1 : NULL; 4508159451Srodrigc break; 4509159451Srodrigc } else { 4510159451Srodrigc page_idx -= erp->er_extoff; 4511159451Srodrigc break; 4512159451Srodrigc } 4513159451Srodrigc } 4514159451Srodrigc *idxp = page_idx; 4515159451Srodrigc *erp_idxp = erp_idx; 4516159451Srodrigc return(erp); 4517159451Srodrigc} 4518159451Srodrigc 4519159451Srodrigc/* 4520159451Srodrigc * Allocate and initialize an indirection array once the space needed 4521159451Srodrigc * for incore extents increases above XFS_IEXT_BUFSZ. 4522159451Srodrigc */ 4523159451Srodrigcvoid 4524159451Srodrigcxfs_iext_irec_init( 4525159451Srodrigc xfs_ifork_t *ifp) /* inode fork pointer */ 4526159451Srodrigc{ 4527159451Srodrigc xfs_ext_irec_t *erp; /* indirection array pointer */ 4528159451Srodrigc xfs_extnum_t nextents; /* number of extents in file */ 4529159451Srodrigc 4530159451Srodrigc ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 4531159451Srodrigc nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4532159451Srodrigc ASSERT(nextents <= XFS_LINEAR_EXTS); 4533159451Srodrigc 4534159451Srodrigc erp = (xfs_ext_irec_t *) 4535159451Srodrigc kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP); 4536159451Srodrigc 4537159451Srodrigc if (nextents == 0) { 4538159451Srodrigc ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) 4539159451Srodrigc kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); 4540159451Srodrigc } else if (!ifp->if_real_bytes) { 4541159451Srodrigc xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); 4542159451Srodrigc } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { 4543159451Srodrigc xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); 4544159451Srodrigc } 4545159451Srodrigc erp->er_extbuf = ifp->if_u1.if_extents; 4546159451Srodrigc erp->er_extcount = nextents; 4547159451Srodrigc erp->er_extoff = 0; 4548159451Srodrigc 4549159451Srodrigc ifp->if_flags |= XFS_IFEXTIREC; 4550159451Srodrigc ifp->if_real_bytes = XFS_IEXT_BUFSZ; 4551159451Srodrigc ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); 4552159451Srodrigc ifp->if_u1.if_ext_irec = erp; 4553159451Srodrigc 4554159451Srodrigc return; 4555159451Srodrigc} 4556159451Srodrigc 4557159451Srodrigc/* 4558159451Srodrigc * Allocate and initialize a new entry in the indirection array. 4559159451Srodrigc */ 4560159451Srodrigcxfs_ext_irec_t * 4561159451Srodrigcxfs_iext_irec_new( 4562159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 4563159451Srodrigc int erp_idx) /* index for new irec */ 4564159451Srodrigc{ 4565159451Srodrigc xfs_ext_irec_t *erp; /* indirection array pointer */ 4566159451Srodrigc int i; /* loop counter */ 4567159451Srodrigc int nlists; /* number of irec's (ex lists) */ 4568159451Srodrigc 4569159451Srodrigc ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4570159451Srodrigc nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4571159451Srodrigc 4572159451Srodrigc /* Resize indirection array */ 4573159451Srodrigc xfs_iext_realloc_indirect(ifp, ++nlists * 4574159451Srodrigc sizeof(xfs_ext_irec_t)); 4575153323Srodrigc /* 4576159451Srodrigc * Move records down in the array so the 4577159451Srodrigc * new page can use erp_idx. 4578153323Srodrigc */ 4579159451Srodrigc erp = ifp->if_u1.if_ext_irec; 4580159451Srodrigc for (i = nlists - 1; i > erp_idx; i--) { 4581159451Srodrigc memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); 4582159451Srodrigc } 4583159451Srodrigc ASSERT(i == erp_idx); 4584153323Srodrigc 4585159451Srodrigc /* Initialize new extent record */ 4586159451Srodrigc erp = ifp->if_u1.if_ext_irec; 4587159451Srodrigc erp[erp_idx].er_extbuf = (xfs_bmbt_rec_t *) 4588159451Srodrigc kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); 4589159451Srodrigc ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 4590159451Srodrigc memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); 4591159451Srodrigc erp[erp_idx].er_extcount = 0; 4592159451Srodrigc erp[erp_idx].er_extoff = erp_idx > 0 ? 4593159451Srodrigc erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; 4594159451Srodrigc return (&erp[erp_idx]); 4595153323Srodrigc} 4596153323Srodrigc 4597159451Srodrigc/* 4598159451Srodrigc * Remove a record from the indirection array. 4599159451Srodrigc */ 4600159451Srodrigcvoid 4601159451Srodrigcxfs_iext_irec_remove( 4602159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 4603159451Srodrigc int erp_idx) /* irec index to remove */ 4604159451Srodrigc{ 4605159451Srodrigc xfs_ext_irec_t *erp; /* indirection array pointer */ 4606159451Srodrigc int i; /* loop counter */ 4607159451Srodrigc int nlists; /* number of irec's (ex lists) */ 4608153323Srodrigc 4609159451Srodrigc ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4610159451Srodrigc nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4611159451Srodrigc erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4612159451Srodrigc if (erp->er_extbuf) { 4613159451Srodrigc xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, 4614159451Srodrigc -erp->er_extcount); 4615159451Srodrigc kmem_free(erp->er_extbuf, XFS_IEXT_BUFSZ); 4616159451Srodrigc } 4617159451Srodrigc /* Compact extent records */ 4618159451Srodrigc erp = ifp->if_u1.if_ext_irec; 4619159451Srodrigc for (i = erp_idx; i < nlists - 1; i++) { 4620159451Srodrigc memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); 4621159451Srodrigc } 4622159451Srodrigc /* 4623159451Srodrigc * Manually free the last extent record from the indirection 4624159451Srodrigc * array. A call to xfs_iext_realloc_indirect() with a size 4625159451Srodrigc * of zero would result in a call to xfs_iext_destroy() which 4626159451Srodrigc * would in turn call this function again, creating a nasty 4627159451Srodrigc * infinite loop. 4628159451Srodrigc */ 4629159451Srodrigc if (--nlists) { 4630159451Srodrigc xfs_iext_realloc_indirect(ifp, 4631159451Srodrigc nlists * sizeof(xfs_ext_irec_t)); 4632159451Srodrigc } else { 4633159451Srodrigc kmem_free(ifp->if_u1.if_ext_irec, 4634159451Srodrigc sizeof(xfs_ext_irec_t)); 4635159451Srodrigc } 4636159451Srodrigc ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 4637159451Srodrigc} 4638159451Srodrigc 4639159451Srodrigc/* 4640159451Srodrigc * This is called to clean up large amounts of unused memory allocated 4641159451Srodrigc * by the indirection array. Before compacting anything though, verify 4642159451Srodrigc * that the indirection array is still needed and switch back to the 4643159451Srodrigc * linear extent list (or even the inline buffer) if possible. The 4644159451Srodrigc * compaction policy is as follows: 4645159451Srodrigc * 4646159451Srodrigc * Full Compaction: Extents fit into a single page (or inline buffer) 4647159451Srodrigc * Full Compaction: Extents occupy less than 10% of allocated space 4648159451Srodrigc * Partial Compaction: Extents occupy > 10% and < 50% of allocated space 4649159451Srodrigc * No Compaction: Extents occupy at least 50% of allocated space 4650159451Srodrigc */ 4651153323Srodrigcvoid 4652159451Srodrigcxfs_iext_irec_compact( 4653159451Srodrigc xfs_ifork_t *ifp) /* inode fork pointer */ 4654153323Srodrigc{ 4655159451Srodrigc xfs_extnum_t nextents; /* number of extents in file */ 4656159451Srodrigc int nlists; /* number of irec's (ex lists) */ 4657159451Srodrigc 4658159451Srodrigc ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4659159451Srodrigc nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4660159451Srodrigc nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 4661159451Srodrigc 4662159451Srodrigc if (nextents == 0) { 4663159451Srodrigc xfs_iext_destroy(ifp); 4664159451Srodrigc } else if (nextents <= XFS_INLINE_EXTS) { 4665159451Srodrigc xfs_iext_indirect_to_direct(ifp); 4666159451Srodrigc xfs_iext_direct_to_inline(ifp, nextents); 4667159451Srodrigc } else if (nextents <= XFS_LINEAR_EXTS) { 4668159451Srodrigc xfs_iext_indirect_to_direct(ifp); 4669159451Srodrigc } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) { 4670159451Srodrigc xfs_iext_irec_compact_full(ifp); 4671159451Srodrigc } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { 4672159451Srodrigc xfs_iext_irec_compact_pages(ifp); 4673159451Srodrigc } 4674153323Srodrigc} 4675159451Srodrigc 4676159451Srodrigc/* 4677159451Srodrigc * Combine extents from neighboring extent pages. 4678159451Srodrigc */ 4679159451Srodrigcvoid 4680159451Srodrigcxfs_iext_irec_compact_pages( 4681159451Srodrigc xfs_ifork_t *ifp) /* inode fork pointer */ 4682159451Srodrigc{ 4683159451Srodrigc xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ 4684159451Srodrigc int erp_idx = 0; /* indirection array index */ 4685159451Srodrigc int nlists; /* number of irec's (ex lists) */ 4686159451Srodrigc 4687159451Srodrigc ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4688159451Srodrigc nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4689159451Srodrigc while (erp_idx < nlists - 1) { 4690159451Srodrigc erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4691159451Srodrigc erp_next = erp + 1; 4692159451Srodrigc if (erp_next->er_extcount <= 4693159451Srodrigc (XFS_LINEAR_EXTS - erp->er_extcount)) { 4694159451Srodrigc memmove(&erp->er_extbuf[erp->er_extcount], 4695159451Srodrigc erp_next->er_extbuf, erp_next->er_extcount * 4696159451Srodrigc sizeof(xfs_bmbt_rec_t)); 4697159451Srodrigc erp->er_extcount += erp_next->er_extcount; 4698159451Srodrigc /* 4699159451Srodrigc * Free page before removing extent record 4700159451Srodrigc * so er_extoffs don't get modified in 4701159451Srodrigc * xfs_iext_irec_remove. 4702159451Srodrigc */ 4703159451Srodrigc kmem_free(erp_next->er_extbuf, XFS_IEXT_BUFSZ); 4704159451Srodrigc erp_next->er_extbuf = NULL; 4705159451Srodrigc xfs_iext_irec_remove(ifp, erp_idx + 1); 4706159451Srodrigc nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4707159451Srodrigc } else { 4708159451Srodrigc erp_idx++; 4709159451Srodrigc } 4710159451Srodrigc } 4711159451Srodrigc} 4712159451Srodrigc 4713159451Srodrigc/* 4714159451Srodrigc * Fully compact the extent records managed by the indirection array. 4715159451Srodrigc */ 4716159451Srodrigcvoid 4717159451Srodrigcxfs_iext_irec_compact_full( 4718159451Srodrigc xfs_ifork_t *ifp) /* inode fork pointer */ 4719159451Srodrigc{ 4720159451Srodrigc xfs_bmbt_rec_t *ep, *ep_next; /* extent record pointers */ 4721159451Srodrigc xfs_ext_irec_t *erp, *erp_next; /* extent irec pointers */ 4722159451Srodrigc int erp_idx = 0; /* extent irec index */ 4723159451Srodrigc int ext_avail; /* empty entries in ex list */ 4724159451Srodrigc int ext_diff; /* number of exts to add */ 4725159451Srodrigc int nlists; /* number of irec's (ex lists) */ 4726159451Srodrigc 4727159451Srodrigc ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4728159451Srodrigc nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4729159451Srodrigc erp = ifp->if_u1.if_ext_irec; 4730159451Srodrigc ep = &erp->er_extbuf[erp->er_extcount]; 4731159451Srodrigc erp_next = erp + 1; 4732159451Srodrigc ep_next = erp_next->er_extbuf; 4733159451Srodrigc while (erp_idx < nlists - 1) { 4734159451Srodrigc ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; 4735159451Srodrigc ext_diff = MIN(ext_avail, erp_next->er_extcount); 4736159451Srodrigc memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t)); 4737159451Srodrigc erp->er_extcount += ext_diff; 4738159451Srodrigc erp_next->er_extcount -= ext_diff; 4739159451Srodrigc /* Remove next page */ 4740159451Srodrigc if (erp_next->er_extcount == 0) { 4741159451Srodrigc /* 4742159451Srodrigc * Free page before removing extent record 4743159451Srodrigc * so er_extoffs don't get modified in 4744159451Srodrigc * xfs_iext_irec_remove. 4745159451Srodrigc */ 4746159451Srodrigc kmem_free(erp_next->er_extbuf, 4747159451Srodrigc erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); 4748159451Srodrigc erp_next->er_extbuf = NULL; 4749159451Srodrigc xfs_iext_irec_remove(ifp, erp_idx + 1); 4750159451Srodrigc erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4751159451Srodrigc nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4752159451Srodrigc /* Update next page */ 4753159451Srodrigc } else { 4754159451Srodrigc /* Move rest of page up to become next new page */ 4755159451Srodrigc memmove(erp_next->er_extbuf, ep_next, 4756159451Srodrigc erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); 4757159451Srodrigc ep_next = erp_next->er_extbuf; 4758159451Srodrigc memset(&ep_next[erp_next->er_extcount], 0, 4759159451Srodrigc (XFS_LINEAR_EXTS - erp_next->er_extcount) * 4760159451Srodrigc sizeof(xfs_bmbt_rec_t)); 4761159451Srodrigc } 4762159451Srodrigc if (erp->er_extcount == XFS_LINEAR_EXTS) { 4763159451Srodrigc erp_idx++; 4764159451Srodrigc if (erp_idx < nlists) 4765159451Srodrigc erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4766159451Srodrigc else 4767159451Srodrigc break; 4768159451Srodrigc } 4769159451Srodrigc ep = &erp->er_extbuf[erp->er_extcount]; 4770159451Srodrigc erp_next = erp + 1; 4771159451Srodrigc ep_next = erp_next->er_extbuf; 4772159451Srodrigc } 4773159451Srodrigc} 4774159451Srodrigc 4775159451Srodrigc/* 4776159451Srodrigc * This is called to update the er_extoff field in the indirection 4777159451Srodrigc * array when extents have been added or removed from one of the 4778159451Srodrigc * extent lists. erp_idx contains the irec index to begin updating 4779159451Srodrigc * at and ext_diff contains the number of extents that were added 4780159451Srodrigc * or removed. 4781159451Srodrigc */ 4782159451Srodrigcvoid 4783159451Srodrigcxfs_iext_irec_update_extoffs( 4784159451Srodrigc xfs_ifork_t *ifp, /* inode fork pointer */ 4785159451Srodrigc int erp_idx, /* irec index to update */ 4786159451Srodrigc int ext_diff) /* number of new extents */ 4787159451Srodrigc{ 4788159451Srodrigc int i; /* loop counter */ 4789159451Srodrigc int nlists; /* number of irec's (ex lists */ 4790159451Srodrigc 4791159451Srodrigc ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4792159451Srodrigc nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4793159451Srodrigc for (i = erp_idx; i < nlists; i++) { 4794159451Srodrigc ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; 4795159451Srodrigc } 4796159451Srodrigc} 4797