1153323Srodrigc/* 2159451Srodrigc * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3159451Srodrigc * All Rights Reserved. 4153323Srodrigc * 5159451Srodrigc * This program is free software; you can redistribute it and/or 6159451Srodrigc * modify it under the terms of the GNU General Public License as 7153323Srodrigc * published by the Free Software Foundation. 8153323Srodrigc * 9159451Srodrigc * This program is distributed in the hope that it would be useful, 10159451Srodrigc * but WITHOUT ANY WARRANTY; without even the implied warranty of 11159451Srodrigc * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12159451Srodrigc * GNU General Public License for more details. 13153323Srodrigc * 14159451Srodrigc * You should have received a copy of the GNU General Public License 15159451Srodrigc * along with this program; if not, write the Free Software Foundation, 16159451Srodrigc * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17153323Srodrigc */ 18153323Srodrigc#include "xfs.h" 19159451Srodrigc#include "xfs_fs.h" 20153323Srodrigc#include "xfs_types.h" 21159451Srodrigc#include "xfs_bit.h" 22159451Srodrigc#include "xfs_log.h" 23153323Srodrigc#include "xfs_inum.h" 24153323Srodrigc#include "xfs_trans.h" 25153323Srodrigc#include "xfs_sb.h" 26153323Srodrigc#include "xfs_ag.h" 27153323Srodrigc#include "xfs_dir.h" 28153323Srodrigc#include "xfs_dir2.h" 29153323Srodrigc#include "xfs_dmapi.h" 30153323Srodrigc#include "xfs_mount.h" 31159451Srodrigc#include "xfs_bmap_btree.h" 32153323Srodrigc#include "xfs_alloc_btree.h" 33153323Srodrigc#include "xfs_ialloc_btree.h" 34153323Srodrigc#include "xfs_dir_sf.h" 35153323Srodrigc#include "xfs_dir2_sf.h" 36159451Srodrigc#include "xfs_attr_sf.h" 37153323Srodrigc#include "xfs_dinode.h" 38153323Srodrigc#include "xfs_inode.h" 39159451Srodrigc#include "xfs_btree.h" 40159451Srodrigc#include "xfs_ialloc.h" 41153323Srodrigc#include "xfs_alloc.h" 42153323Srodrigc#include "xfs_rtalloc.h" 43153323Srodrigc#include "xfs_bmap.h" 44153323Srodrigc#include "xfs_error.h" 45153323Srodrigc#include "xfs_rw.h" 46153323Srodrigc#include "xfs_quota.h" 47153323Srodrigc#include "xfs_fsops.h" 48153323Srodrigc 49153323SrodrigcSTATIC void xfs_mount_log_sbunit(xfs_mount_t *, __int64_t); 50153323SrodrigcSTATIC int xfs_uuid_mount(xfs_mount_t *); 51153323SrodrigcSTATIC void xfs_uuid_unmount(xfs_mount_t *mp); 52159451SrodrigcSTATIC void xfs_unmountfs_wait(xfs_mount_t *); 53153323Srodrigc 54153323Srodrigc 55159451Srodrigc#ifdef HAVE_PERCPU_SB 56159451SrodrigcSTATIC void xfs_icsb_destroy_counters(xfs_mount_t *); 57159451SrodrigcSTATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int); 58159451SrodrigcSTATIC void xfs_icsb_sync_counters(xfs_mount_t *); 59159451SrodrigcSTATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, 60159451Srodrigc int, int); 61159451SrodrigcSTATIC int xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t, 62159451Srodrigc int, int); 63159451SrodrigcSTATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); 64153323Srodrigc 65159451Srodrigc#else 66159451Srodrigc 67159451Srodrigc#define xfs_icsb_destroy_counters(mp) do { } while (0) 68159451Srodrigc#define xfs_icsb_balance_counter(mp, a, b) do { } while (0) 69159451Srodrigc#define xfs_icsb_sync_counters(mp) do { } while (0) 70159451Srodrigc#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) 71159451Srodrigc#define xfs_icsb_modify_counters_locked(mp, a, b, c) do { } while (0) 72159451Srodrigc 73159451Srodrigc#endif 74159451Srodrigc 75159451Srodrigcstatic const struct { 76159451Srodrigc short offset; 77159451Srodrigc short type; /* 0 = integer 78159451Srodrigc * 1 = binary / string (no translation) 79159451Srodrigc */ 80153323Srodrigc} xfs_sb_info[] = { 81153323Srodrigc { offsetof(xfs_sb_t, sb_magicnum), 0 }, 82153323Srodrigc { offsetof(xfs_sb_t, sb_blocksize), 0 }, 83153323Srodrigc { offsetof(xfs_sb_t, sb_dblocks), 0 }, 84153323Srodrigc { offsetof(xfs_sb_t, sb_rblocks), 0 }, 85153323Srodrigc { offsetof(xfs_sb_t, sb_rextents), 0 }, 86153323Srodrigc { offsetof(xfs_sb_t, sb_uuid), 1 }, 87153323Srodrigc { offsetof(xfs_sb_t, sb_logstart), 0 }, 88153323Srodrigc { offsetof(xfs_sb_t, sb_rootino), 0 }, 89153323Srodrigc { offsetof(xfs_sb_t, sb_rbmino), 0 }, 90153323Srodrigc { offsetof(xfs_sb_t, sb_rsumino), 0 }, 91153323Srodrigc { offsetof(xfs_sb_t, sb_rextsize), 0 }, 92153323Srodrigc { offsetof(xfs_sb_t, sb_agblocks), 0 }, 93153323Srodrigc { offsetof(xfs_sb_t, sb_agcount), 0 }, 94153323Srodrigc { offsetof(xfs_sb_t, sb_rbmblocks), 0 }, 95153323Srodrigc { offsetof(xfs_sb_t, sb_logblocks), 0 }, 96153323Srodrigc { offsetof(xfs_sb_t, sb_versionnum), 0 }, 97153323Srodrigc { offsetof(xfs_sb_t, sb_sectsize), 0 }, 98153323Srodrigc { offsetof(xfs_sb_t, sb_inodesize), 0 }, 99153323Srodrigc { offsetof(xfs_sb_t, sb_inopblock), 0 }, 100153323Srodrigc { offsetof(xfs_sb_t, sb_fname[0]), 1 }, 101153323Srodrigc { offsetof(xfs_sb_t, sb_blocklog), 0 }, 102153323Srodrigc { offsetof(xfs_sb_t, sb_sectlog), 0 }, 103153323Srodrigc { offsetof(xfs_sb_t, sb_inodelog), 0 }, 104153323Srodrigc { offsetof(xfs_sb_t, sb_inopblog), 0 }, 105153323Srodrigc { offsetof(xfs_sb_t, sb_agblklog), 0 }, 106153323Srodrigc { offsetof(xfs_sb_t, sb_rextslog), 0 }, 107153323Srodrigc { offsetof(xfs_sb_t, sb_inprogress), 0 }, 108153323Srodrigc { offsetof(xfs_sb_t, sb_imax_pct), 0 }, 109153323Srodrigc { offsetof(xfs_sb_t, sb_icount), 0 }, 110153323Srodrigc { offsetof(xfs_sb_t, sb_ifree), 0 }, 111153323Srodrigc { offsetof(xfs_sb_t, sb_fdblocks), 0 }, 112153323Srodrigc { offsetof(xfs_sb_t, sb_frextents), 0 }, 113153323Srodrigc { offsetof(xfs_sb_t, sb_uquotino), 0 }, 114153323Srodrigc { offsetof(xfs_sb_t, sb_gquotino), 0 }, 115153323Srodrigc { offsetof(xfs_sb_t, sb_qflags), 0 }, 116153323Srodrigc { offsetof(xfs_sb_t, sb_flags), 0 }, 117153323Srodrigc { offsetof(xfs_sb_t, sb_shared_vn), 0 }, 118153323Srodrigc { offsetof(xfs_sb_t, sb_inoalignmt), 0 }, 119153323Srodrigc { offsetof(xfs_sb_t, sb_unit), 0 }, 120153323Srodrigc { offsetof(xfs_sb_t, sb_width), 0 }, 121153323Srodrigc { offsetof(xfs_sb_t, sb_dirblklog), 0 }, 122153323Srodrigc { offsetof(xfs_sb_t, sb_logsectlog), 0 }, 123153323Srodrigc { offsetof(xfs_sb_t, sb_logsectsize),0 }, 124153323Srodrigc { offsetof(xfs_sb_t, sb_logsunit), 0 }, 125159451Srodrigc { offsetof(xfs_sb_t, sb_features2), 0 }, 126153323Srodrigc { sizeof(xfs_sb_t), 0 } 127153323Srodrigc}; 128153323Srodrigc 129153323Srodrigc/* 130153323Srodrigc * Return a pointer to an initialized xfs_mount structure. 131153323Srodrigc */ 132153323Srodrigcxfs_mount_t * 133153323Srodrigcxfs_mount_init(void) 134153323Srodrigc{ 135153323Srodrigc xfs_mount_t *mp; 136153323Srodrigc 137159451Srodrigc mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP); 138153323Srodrigc 139159451Srodrigc if (xfs_icsb_init_counters(mp)) { 140159451Srodrigc mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; 141159451Srodrigc } 142159451Srodrigc 143153323Srodrigc AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail"); 144153323Srodrigc spinlock_init(&mp->m_sb_lock, "xfs_sb"); 145159451Srodrigc /* FreeBSD specfic */ 146159451Srodrigc sx_init(&mp->m_ilock, "xfs_mnt"); 147153323Srodrigc initnsema(&mp->m_growlock, 1, "xfs_grow"); 148153323Srodrigc /* 149153323Srodrigc * Initialize the AIL. 150153323Srodrigc */ 151153323Srodrigc xfs_trans_ail_init(mp); 152153323Srodrigc 153153323Srodrigc atomic_set(&mp->m_active_trans, 0); 154153323Srodrigc 155153323Srodrigc return mp; 156153323Srodrigc} 157153323Srodrigc 158153323Srodrigc/* 159153323Srodrigc * Free up the resources associated with a mount structure. Assume that 160153323Srodrigc * the structure was initially zeroed, so we can tell which fields got 161153323Srodrigc * initialized. 162153323Srodrigc */ 163153323Srodrigcvoid 164153323Srodrigcxfs_mount_free( 165159451Srodrigc xfs_mount_t *mp, 166159451Srodrigc int remove_bhv) 167153323Srodrigc{ 168153323Srodrigc if (mp->m_ihash) 169153323Srodrigc xfs_ihash_free(mp); 170153323Srodrigc if (mp->m_chash) 171153323Srodrigc xfs_chash_free(mp); 172153323Srodrigc 173153323Srodrigc if (mp->m_perag) { 174153323Srodrigc int agno; 175153323Srodrigc 176159451Srodrigc for (agno = 0; agno < mp->m_maxagi; agno++) 177153323Srodrigc if (mp->m_perag[agno].pagb_list) 178153323Srodrigc kmem_free(mp->m_perag[agno].pagb_list, 179153323Srodrigc sizeof(xfs_perag_busy_t) * 180153323Srodrigc XFS_PAGB_NUM_SLOTS); 181153323Srodrigc kmem_free(mp->m_perag, 182153323Srodrigc sizeof(xfs_perag_t) * mp->m_sb.sb_agcount); 183153323Srodrigc } 184153323Srodrigc 185153323Srodrigc AIL_LOCK_DESTROY(&mp->m_ail_lock); 186153323Srodrigc spinlock_destroy(&mp->m_sb_lock); 187159451Srodrigc /* FreeBSD specfic */ 188159451Srodrigc sx_destroy(&mp->m_ilock); 189153323Srodrigc freesema(&mp->m_growlock); 190153323Srodrigc if (mp->m_quotainfo) 191153323Srodrigc XFS_QM_DONE(mp); 192153323Srodrigc 193153323Srodrigc if (mp->m_fsname != NULL) 194153323Srodrigc kmem_free(mp->m_fsname, mp->m_fsname_len); 195159451Srodrigc if (mp->m_rtname != NULL) 196159451Srodrigc kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1); 197159451Srodrigc if (mp->m_logname != NULL) 198159451Srodrigc kmem_free(mp->m_logname, strlen(mp->m_logname) + 1); 199153323Srodrigc 200153323Srodrigc if (remove_bhv) { 201159451Srodrigc xfs_vfs_t *vfsp = XFS_MTOVFS(mp); 202153323Srodrigc 203153323Srodrigc bhv_remove_all_vfsops(vfsp, 0); 204153323Srodrigc VFS_REMOVEBHV(vfsp, &mp->m_bhv); 205153323Srodrigc } 206153323Srodrigc 207159451Srodrigc xfs_icsb_destroy_counters(mp); 208153323Srodrigc kmem_free(mp, sizeof(xfs_mount_t)); 209153323Srodrigc} 210153323Srodrigc 211153323Srodrigc 212153323Srodrigc/* 213153323Srodrigc * Check the validity of the SB found. 214153323Srodrigc */ 215153323SrodrigcSTATIC int 216153323Srodrigcxfs_mount_validate_sb( 217153323Srodrigc xfs_mount_t *mp, 218159451Srodrigc xfs_sb_t *sbp, 219159451Srodrigc int flags) 220153323Srodrigc{ 221153323Srodrigc /* 222153323Srodrigc * If the log device and data device have the 223153323Srodrigc * same device number, the log is internal. 224153323Srodrigc * Consequently, the sb_logstart should be non-zero. If 225153323Srodrigc * we have a zero sb_logstart in this case, we may be trying to mount 226153323Srodrigc * a volume filesystem in a non-volume manner. 227153323Srodrigc */ 228153323Srodrigc if (sbp->sb_magicnum != XFS_SB_MAGIC) { 229159451Srodrigc xfs_fs_mount_cmn_err(flags, "bad magic number"); 230153323Srodrigc return XFS_ERROR(EWRONGFS); 231153323Srodrigc } 232153323Srodrigc 233153323Srodrigc if (!XFS_SB_GOOD_VERSION(sbp)) { 234159451Srodrigc xfs_fs_mount_cmn_err(flags, "bad version"); 235153323Srodrigc return XFS_ERROR(EWRONGFS); 236153323Srodrigc } 237153323Srodrigc 238153323Srodrigc if (unlikely( 239153323Srodrigc sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { 240159451Srodrigc xfs_fs_mount_cmn_err(flags, 241159451Srodrigc "filesystem is marked as having an external log; " 242159451Srodrigc "specify logdev on the\nmount command line."); 243159451Srodrigc return XFS_ERROR(EINVAL); 244153323Srodrigc } 245153323Srodrigc 246153323Srodrigc if (unlikely( 247153323Srodrigc sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { 248159451Srodrigc xfs_fs_mount_cmn_err(flags, 249159451Srodrigc "filesystem is marked as having an internal log; " 250159451Srodrigc "do not specify logdev on\nthe mount command line."); 251159451Srodrigc return XFS_ERROR(EINVAL); 252153323Srodrigc } 253153323Srodrigc 254153323Srodrigc /* 255153323Srodrigc * More sanity checking. These were stolen directly from 256153323Srodrigc * xfs_repair. 257153323Srodrigc */ 258153323Srodrigc if (unlikely( 259153323Srodrigc sbp->sb_agcount <= 0 || 260153323Srodrigc sbp->sb_sectsize < XFS_MIN_SECTORSIZE || 261153323Srodrigc sbp->sb_sectsize > XFS_MAX_SECTORSIZE || 262153323Srodrigc sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || 263153323Srodrigc sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || 264153323Srodrigc sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || 265153323Srodrigc sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || 266153323Srodrigc sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || 267153323Srodrigc sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || 268153323Srodrigc sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || 269153323Srodrigc sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || 270159451Srodrigc sbp->sb_inodelog < XFS_DINODE_MIN_LOG || 271159451Srodrigc sbp->sb_inodelog > XFS_DINODE_MAX_LOG || 272159451Srodrigc (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 273153323Srodrigc (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 274153323Srodrigc (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 275159451Srodrigc (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { 276159451Srodrigc xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); 277153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 278153323Srodrigc } 279153323Srodrigc 280153323Srodrigc /* 281153323Srodrigc * Sanity check AG count, size fields against data size field 282153323Srodrigc */ 283153323Srodrigc if (unlikely( 284153323Srodrigc sbp->sb_dblocks == 0 || 285153323Srodrigc sbp->sb_dblocks > 286153323Srodrigc (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || 287153323Srodrigc sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * 288153323Srodrigc sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { 289159451Srodrigc xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed"); 290153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 291153323Srodrigc } 292153323Srodrigc 293159451Srodrigc ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); 294159451Srodrigc ASSERT(sbp->sb_blocklog >= BBSHIFT); 295159451Srodrigc 296159451Srodrigc#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ 297153323Srodrigc if (unlikely( 298159451Srodrigc (sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX || 299159451Srodrigc (sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) { 300159451Srodrigc#else /* Limited by UINT_MAX of sectors */ 301159451Srodrigc if (unlikely( 302159451Srodrigc (sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX || 303159451Srodrigc (sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) { 304159451Srodrigc#endif 305159451Srodrigc xfs_fs_mount_cmn_err(flags, 306159451Srodrigc "file system too large to be mounted on this system."); 307153323Srodrigc return XFS_ERROR(E2BIG); 308153323Srodrigc } 309153323Srodrigc 310153323Srodrigc if (unlikely(sbp->sb_inprogress)) { 311159451Srodrigc xfs_fs_mount_cmn_err(flags, "file system busy"); 312153323Srodrigc return XFS_ERROR(EFSCORRUPTED); 313153323Srodrigc } 314153323Srodrigc 315153323Srodrigc /* 316159451Srodrigc * Version 1 directory format has never worked on Linux. 317159451Srodrigc */ 318159451Srodrigc if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) { 319159451Srodrigc xfs_fs_mount_cmn_err(flags, 320159451Srodrigc "file system using version 1 directory format"); 321159451Srodrigc return XFS_ERROR(ENOSYS); 322159451Srodrigc } 323159451Srodrigc 324159451Srodrigc /* 325153323Srodrigc * Until this is fixed only page-sized or smaller data blocks work. 326153323Srodrigc */ 327153323Srodrigc if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { 328159451Srodrigc xfs_fs_mount_cmn_err(flags, 329159451Srodrigc "file system with blocksize %d bytes", 330153323Srodrigc sbp->sb_blocksize); 331159451Srodrigc xfs_fs_mount_cmn_err(flags, 332159451Srodrigc "only pagesize (%ld) or less will currently work.", 333153323Srodrigc PAGE_SIZE); 334153323Srodrigc return XFS_ERROR(ENOSYS); 335153323Srodrigc } 336153323Srodrigc 337153323Srodrigc return 0; 338153323Srodrigc} 339153323Srodrigc 340159451Srodrigcxfs_agnumber_t 341159451Srodrigcxfs_initialize_perag( 342159451Srodrigc struct xfs_vfs *vfs, 343159451Srodrigc xfs_mount_t *mp, 344159451Srodrigc xfs_agnumber_t agcount) 345153323Srodrigc{ 346159451Srodrigc xfs_agnumber_t index, max_metadata; 347153323Srodrigc xfs_perag_t *pag; 348153323Srodrigc xfs_agino_t agino; 349153323Srodrigc xfs_ino_t ino; 350153323Srodrigc xfs_sb_t *sbp = &mp->m_sb; 351153323Srodrigc xfs_ino_t max_inum = XFS_MAXINUMBER_32; 352153323Srodrigc 353153323Srodrigc /* Check to see if the filesystem can overflow 32 bit inodes */ 354153323Srodrigc agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); 355153323Srodrigc ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 356153323Srodrigc 357153323Srodrigc /* Clear the mount flag if no inode can overflow 32 bits 358153323Srodrigc * on this filesystem, or if specifically requested.. 359153323Srodrigc */ 360159451Srodrigc if ((vfs->vfs_flag & VFS_32BITINODES) && ino > max_inum) { 361153323Srodrigc mp->m_flags |= XFS_MOUNT_32BITINODES; 362153323Srodrigc } else { 363153323Srodrigc mp->m_flags &= ~XFS_MOUNT_32BITINODES; 364153323Srodrigc } 365153323Srodrigc 366153323Srodrigc /* If we can overflow then setup the ag headers accordingly */ 367153323Srodrigc if (mp->m_flags & XFS_MOUNT_32BITINODES) { 368153323Srodrigc /* Calculate how much should be reserved for inodes to 369153323Srodrigc * meet the max inode percentage. 370153323Srodrigc */ 371153323Srodrigc if (mp->m_maxicount) { 372153323Srodrigc __uint64_t icount; 373153323Srodrigc 374153323Srodrigc icount = sbp->sb_dblocks * sbp->sb_imax_pct; 375153323Srodrigc do_div(icount, 100); 376153323Srodrigc icount += sbp->sb_agblocks - 1; 377159451Srodrigc do_div(icount, sbp->sb_agblocks); 378153323Srodrigc max_metadata = icount; 379153323Srodrigc } else { 380153323Srodrigc max_metadata = agcount; 381153323Srodrigc } 382153323Srodrigc for (index = 0; index < agcount; index++) { 383153323Srodrigc ino = XFS_AGINO_TO_INO(mp, index, agino); 384153323Srodrigc if (ino > max_inum) { 385153323Srodrigc index++; 386153323Srodrigc break; 387153323Srodrigc } 388153323Srodrigc 389159451Srodrigc /* This ag is preferred for inodes */ 390153323Srodrigc pag = &mp->m_perag[index]; 391153323Srodrigc pag->pagi_inodeok = 1; 392153323Srodrigc if (index < max_metadata) 393153323Srodrigc pag->pagf_metadata = 1; 394153323Srodrigc } 395153323Srodrigc } else { 396153323Srodrigc /* Setup default behavior for smaller filesystems */ 397153323Srodrigc for (index = 0; index < agcount; index++) { 398153323Srodrigc pag = &mp->m_perag[index]; 399153323Srodrigc pag->pagi_inodeok = 1; 400153323Srodrigc } 401153323Srodrigc } 402159451Srodrigc return index; 403153323Srodrigc} 404153323Srodrigc 405153323Srodrigc/* 406153323Srodrigc * xfs_xlatesb 407153323Srodrigc * 408153323Srodrigc * data - on disk version of sb 409153323Srodrigc * sb - a superblock 410153323Srodrigc * dir - conversion direction: <0 - convert sb to buf 411153323Srodrigc * >0 - convert buf to sb 412153323Srodrigc * fields - which fields to copy (bitmask) 413153323Srodrigc */ 414153323Srodrigcvoid 415153323Srodrigcxfs_xlatesb( 416153323Srodrigc void *data, 417153323Srodrigc xfs_sb_t *sb, 418153323Srodrigc int dir, 419153323Srodrigc __int64_t fields) 420153323Srodrigc{ 421153323Srodrigc xfs_caddr_t buf_ptr; 422153323Srodrigc xfs_caddr_t mem_ptr; 423153323Srodrigc xfs_sb_field_t f; 424153323Srodrigc int first; 425153323Srodrigc int size; 426153323Srodrigc 427153323Srodrigc ASSERT(dir); 428153323Srodrigc ASSERT(fields); 429153323Srodrigc 430153323Srodrigc if (!fields) 431153323Srodrigc return; 432153323Srodrigc 433153323Srodrigc buf_ptr = (xfs_caddr_t)data; 434153323Srodrigc mem_ptr = (xfs_caddr_t)sb; 435153323Srodrigc 436153323Srodrigc while (fields) { 437153323Srodrigc f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 438153323Srodrigc first = xfs_sb_info[f].offset; 439153323Srodrigc size = xfs_sb_info[f + 1].offset - first; 440153323Srodrigc 441153323Srodrigc ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1); 442153323Srodrigc 443159451Srodrigc if (size == 1 || xfs_sb_info[f].type == 1) { 444153323Srodrigc if (dir > 0) { 445153323Srodrigc memcpy(mem_ptr + first, buf_ptr + first, size); 446153323Srodrigc } else { 447153323Srodrigc memcpy(buf_ptr + first, mem_ptr + first, size); 448153323Srodrigc } 449153323Srodrigc } else { 450153323Srodrigc switch (size) { 451153323Srodrigc case 2: 452153323Srodrigc INT_XLATE(*(__uint16_t*)(buf_ptr+first), 453153323Srodrigc *(__uint16_t*)(mem_ptr+first), 454159451Srodrigc dir, ARCH_CONVERT); 455153323Srodrigc break; 456153323Srodrigc case 4: 457153323Srodrigc INT_XLATE(*(__uint32_t*)(buf_ptr+first), 458153323Srodrigc *(__uint32_t*)(mem_ptr+first), 459159451Srodrigc dir, ARCH_CONVERT); 460153323Srodrigc break; 461153323Srodrigc case 8: 462153323Srodrigc INT_XLATE(*(__uint64_t*)(buf_ptr+first), 463159451Srodrigc *(__uint64_t*)(mem_ptr+first), dir, ARCH_CONVERT); 464153323Srodrigc break; 465153323Srodrigc default: 466153323Srodrigc ASSERT(0); 467153323Srodrigc } 468153323Srodrigc } 469153323Srodrigc 470153323Srodrigc fields &= ~(1LL << f); 471153323Srodrigc } 472153323Srodrigc} 473153323Srodrigc 474153323Srodrigc/* 475153323Srodrigc * xfs_readsb 476153323Srodrigc * 477153323Srodrigc * Does the initial read of the superblock. 478153323Srodrigc */ 479153323Srodrigcint 480159451Srodrigcxfs_readsb(xfs_mount_t *mp, int flags) 481153323Srodrigc{ 482153323Srodrigc unsigned int sector_size; 483153323Srodrigc unsigned int extra_flags; 484153323Srodrigc xfs_buf_t *bp; 485153323Srodrigc xfs_sb_t *sbp; 486153323Srodrigc int error; 487153323Srodrigc 488153323Srodrigc ASSERT(mp->m_sb_bp == NULL); 489153323Srodrigc ASSERT(mp->m_ddev_targp != NULL); 490153323Srodrigc 491153323Srodrigc /* 492153323Srodrigc * Allocate a (locked) buffer to hold the superblock. 493153323Srodrigc * This will be kept around at all times to optimize 494153323Srodrigc * access to the superblock. 495153323Srodrigc */ 496153323Srodrigc sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); 497159451Srodrigc extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; 498153323Srodrigc 499159451Srodrigc bp = xfs_getsb(mp,0); 500159451Srodrigc 501153323Srodrigc if (!bp || XFS_BUF_ISERROR(bp)) { 502159451Srodrigc xfs_fs_mount_cmn_err(flags, "SB read failed"); 503153323Srodrigc error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 504153323Srodrigc goto fail; 505153323Srodrigc } 506153323Srodrigc ASSERT(XFS_BUF_ISBUSY(bp)); 507153323Srodrigc ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 508153323Srodrigc 509153323Srodrigc /* 510153323Srodrigc * Initialize the mount structure from the superblock. 511153323Srodrigc * But first do some basic consistency checking. 512153323Srodrigc */ 513153323Srodrigc sbp = XFS_BUF_TO_SBP(bp); 514159451Srodrigc xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), 1, XFS_SB_ALL_BITS); 515153323Srodrigc 516159451Srodrigc error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); 517153323Srodrigc if (error) { 518159451Srodrigc xfs_fs_mount_cmn_err(flags, "SB validate failed"); 519153323Srodrigc goto fail; 520153323Srodrigc } 521153323Srodrigc 522153323Srodrigc /* 523153323Srodrigc * We must be able to do sector-sized and sector-aligned IO. 524153323Srodrigc */ 525153323Srodrigc if (sector_size > mp->m_sb.sb_sectsize) { 526159451Srodrigc xfs_fs_mount_cmn_err(flags, 527159451Srodrigc "device supports only %u byte sectors (not %u)", 528153323Srodrigc sector_size, mp->m_sb.sb_sectsize); 529153323Srodrigc error = ENOSYS; 530153323Srodrigc goto fail; 531153323Srodrigc } 532153323Srodrigc 533153323Srodrigc /* 534153323Srodrigc * If device sector size is smaller than the superblock size, 535153323Srodrigc * re-read the superblock so the buffer is correctly sized. 536153323Srodrigc */ 537153323Srodrigc if (sector_size < mp->m_sb.sb_sectsize) { 538153323Srodrigc XFS_BUF_UNMANAGE(bp); 539153323Srodrigc xfs_buf_relse(bp); 540153323Srodrigc sector_size = mp->m_sb.sb_sectsize; 541153323Srodrigc bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 542153323Srodrigc BTOBB(sector_size), extra_flags); 543153323Srodrigc if (!bp || XFS_BUF_ISERROR(bp)) { 544159451Srodrigc xfs_fs_mount_cmn_err(flags, "SB re-read failed"); 545153323Srodrigc error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 546153323Srodrigc goto fail; 547153323Srodrigc } 548153323Srodrigc ASSERT(XFS_BUF_ISBUSY(bp)); 549153323Srodrigc ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 550153323Srodrigc } 551153323Srodrigc 552159451Srodrigc xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); 553159451Srodrigc xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); 554159451Srodrigc xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); 555159451Srodrigc 556153323Srodrigc mp->m_sb_bp = bp; 557153323Srodrigc xfs_buf_relse(bp); 558153323Srodrigc ASSERT(XFS_BUF_VALUSEMA(bp) > 0); 559153323Srodrigc return 0; 560153323Srodrigc 561153323Srodrigc fail: 562153323Srodrigc if (bp) { 563153323Srodrigc XFS_BUF_UNMANAGE(bp); 564153323Srodrigc xfs_buf_relse(bp); 565153323Srodrigc } 566153323Srodrigc return error; 567153323Srodrigc} 568153323Srodrigc 569153323Srodrigc 570153323Srodrigc/* 571153323Srodrigc * xfs_mount_common 572153323Srodrigc * 573153323Srodrigc * Mount initialization code establishing various mount 574153323Srodrigc * fields from the superblock associated with the given 575153323Srodrigc * mount structure 576153323Srodrigc */ 577153323SrodrigcSTATIC void 578153323Srodrigcxfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) 579153323Srodrigc{ 580153323Srodrigc int i; 581153323Srodrigc 582153323Srodrigc mp->m_agfrotor = mp->m_agirotor = 0; 583159451Srodrigc spinlock_init(&mp->m_agirotor_lock, "m_agirotor_lock"); 584153323Srodrigc mp->m_maxagi = mp->m_sb.sb_agcount; 585153323Srodrigc mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; 586153323Srodrigc mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; 587153323Srodrigc mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; 588153323Srodrigc mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; 589153323Srodrigc mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; 590153323Srodrigc mp->m_litino = sbp->sb_inodesize - 591153323Srodrigc ((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t)); 592153323Srodrigc mp->m_blockmask = sbp->sb_blocksize - 1; 593153323Srodrigc mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; 594153323Srodrigc mp->m_blockwmask = mp->m_blockwsize - 1; 595159451Srodrigc#ifdef RMC 596159451Srodrigc INIT_LIST_HEAD(&mp->m_del_inodes); 597159451Srodrigc#endif 598153323Srodrigc TAILQ_INIT(&mp->m_del_inodes); 599153323Srodrigc 600153323Srodrigc /* 601153323Srodrigc * Setup for attributes, in case they get created. 602153323Srodrigc * This value is for inodes getting attributes for the first time, 603153323Srodrigc * the per-inode value is for old attribute values. 604153323Srodrigc */ 605153323Srodrigc ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048); 606153323Srodrigc switch (sbp->sb_inodesize) { 607153323Srodrigc case 256: 608159451Srodrigc mp->m_attroffset = XFS_LITINO(mp) - 609159451Srodrigc XFS_BMDR_SPACE_CALC(MINABTPTRS); 610153323Srodrigc break; 611153323Srodrigc case 512: 612153323Srodrigc case 1024: 613153323Srodrigc case 2048: 614159451Srodrigc mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); 615153323Srodrigc break; 616153323Srodrigc default: 617153323Srodrigc ASSERT(0); 618153323Srodrigc } 619153323Srodrigc ASSERT(mp->m_attroffset < XFS_LITINO(mp)); 620153323Srodrigc 621153323Srodrigc for (i = 0; i < 2; i++) { 622153323Srodrigc mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 623153323Srodrigc xfs_alloc, i == 0); 624153323Srodrigc mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 625153323Srodrigc xfs_alloc, i == 0); 626153323Srodrigc } 627153323Srodrigc for (i = 0; i < 2; i++) { 628153323Srodrigc mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 629153323Srodrigc xfs_bmbt, i == 0); 630153323Srodrigc mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 631153323Srodrigc xfs_bmbt, i == 0); 632153323Srodrigc } 633153323Srodrigc for (i = 0; i < 2; i++) { 634153323Srodrigc mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 635153323Srodrigc xfs_inobt, i == 0); 636153323Srodrigc mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 637153323Srodrigc xfs_inobt, i == 0); 638153323Srodrigc } 639153323Srodrigc 640153323Srodrigc mp->m_bsize = XFS_FSB_TO_BB(mp, 1); 641153323Srodrigc mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, 642153323Srodrigc sbp->sb_inopblock); 643153323Srodrigc mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; 644153323Srodrigc} 645153323Srodrigc/* 646153323Srodrigc * xfs_mountfs 647153323Srodrigc * 648153323Srodrigc * This function does the following on an initial mount of a file system: 649153323Srodrigc * - reads the superblock from disk and init the mount struct 650153323Srodrigc * - if we're a 32-bit kernel, do a size check on the superblock 651153323Srodrigc * so we don't mount terabyte filesystems 652153323Srodrigc * - init mount struct realtime fields 653153323Srodrigc * - allocate inode hash table for fs 654153323Srodrigc * - init directory manager 655153323Srodrigc * - perform recovery and init the log manager 656153323Srodrigc */ 657153323Srodrigcint 658153323Srodrigcxfs_mountfs( 659153323Srodrigc xfs_vfs_t *vfsp, 660153323Srodrigc xfs_mount_t *mp, 661153323Srodrigc int mfsi_flags) 662153323Srodrigc{ 663153323Srodrigc xfs_buf_t *bp; 664153323Srodrigc xfs_sb_t *sbp = &(mp->m_sb); 665153323Srodrigc xfs_inode_t *rip; 666159451Srodrigc xfs_vnode_t *rvp = NULL; 667153323Srodrigc int readio_log, writeio_log; 668153323Srodrigc xfs_daddr_t d; 669153323Srodrigc __uint64_t ret64; 670153323Srodrigc __int64_t update_flags; 671153323Srodrigc uint quotamount, quotaflags; 672153323Srodrigc int agno; 673153323Srodrigc int uuid_mounted = 0; 674153323Srodrigc int error = 0; 675153323Srodrigc 676153323Srodrigc if (mp->m_sb_bp == NULL) { 677159451Srodrigc if ((error = xfs_readsb(mp, mfsi_flags))) { 678159451Srodrigc return error; 679153323Srodrigc } 680153323Srodrigc } 681153323Srodrigc xfs_mount_common(mp, sbp); 682153323Srodrigc 683153323Srodrigc /* 684153323Srodrigc * Check if sb_agblocks is aligned at stripe boundary 685153323Srodrigc * If sb_agblocks is NOT aligned turn off m_dalign since 686153323Srodrigc * allocator alignment is within an ag, therefore ag has 687153323Srodrigc * to be aligned at stripe boundary. 688153323Srodrigc */ 689153323Srodrigc update_flags = 0LL; 690153323Srodrigc if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) { 691153323Srodrigc /* 692153323Srodrigc * If stripe unit and stripe width are not multiples 693153323Srodrigc * of the fs blocksize turn off alignment. 694153323Srodrigc */ 695153323Srodrigc if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || 696153323Srodrigc (BBTOB(mp->m_swidth) & mp->m_blockmask)) { 697153323Srodrigc if (mp->m_flags & XFS_MOUNT_RETERR) { 698153323Srodrigc cmn_err(CE_WARN, 699153323Srodrigc "XFS: alignment check 1 failed"); 700153323Srodrigc error = XFS_ERROR(EINVAL); 701153323Srodrigc goto error1; 702153323Srodrigc } 703153323Srodrigc mp->m_dalign = mp->m_swidth = 0; 704153323Srodrigc } else { 705153323Srodrigc /* 706153323Srodrigc * Convert the stripe unit and width to FSBs. 707153323Srodrigc */ 708153323Srodrigc mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); 709153323Srodrigc if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { 710153323Srodrigc if (mp->m_flags & XFS_MOUNT_RETERR) { 711153323Srodrigc error = XFS_ERROR(EINVAL); 712153323Srodrigc goto error1; 713153323Srodrigc } 714159451Srodrigc xfs_fs_cmn_err(CE_WARN, mp, 715159451Srodrigc"stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)", 716159451Srodrigc mp->m_dalign, mp->m_swidth, 717159451Srodrigc sbp->sb_agblocks); 718159451Srodrigc 719153323Srodrigc mp->m_dalign = 0; 720153323Srodrigc mp->m_swidth = 0; 721153323Srodrigc } else if (mp->m_dalign) { 722153323Srodrigc mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); 723153323Srodrigc } else { 724153323Srodrigc if (mp->m_flags & XFS_MOUNT_RETERR) { 725159451Srodrigc xfs_fs_cmn_err(CE_WARN, mp, 726159451Srodrigc"stripe alignment turned off: sunit(%d) less than bsize(%d)", 727159451Srodrigc mp->m_dalign, 728159451Srodrigc mp->m_blockmask +1); 729153323Srodrigc error = XFS_ERROR(EINVAL); 730153323Srodrigc goto error1; 731153323Srodrigc } 732153323Srodrigc mp->m_swidth = 0; 733153323Srodrigc } 734153323Srodrigc } 735153323Srodrigc 736153323Srodrigc /* 737153323Srodrigc * Update superblock with new values 738153323Srodrigc * and log changes 739153323Srodrigc */ 740153323Srodrigc if (XFS_SB_VERSION_HASDALIGN(sbp)) { 741153323Srodrigc if (sbp->sb_unit != mp->m_dalign) { 742153323Srodrigc sbp->sb_unit = mp->m_dalign; 743153323Srodrigc update_flags |= XFS_SB_UNIT; 744153323Srodrigc } 745153323Srodrigc if (sbp->sb_width != mp->m_swidth) { 746153323Srodrigc sbp->sb_width = mp->m_swidth; 747153323Srodrigc update_flags |= XFS_SB_WIDTH; 748153323Srodrigc } 749153323Srodrigc } 750153323Srodrigc } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && 751153323Srodrigc XFS_SB_VERSION_HASDALIGN(&mp->m_sb)) { 752153323Srodrigc mp->m_dalign = sbp->sb_unit; 753153323Srodrigc mp->m_swidth = sbp->sb_width; 754153323Srodrigc } 755153323Srodrigc 756153323Srodrigc xfs_alloc_compute_maxlevels(mp); 757153323Srodrigc xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); 758153323Srodrigc xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); 759153323Srodrigc xfs_ialloc_compute_maxlevels(mp); 760153323Srodrigc 761153323Srodrigc if (sbp->sb_imax_pct) { 762153323Srodrigc __uint64_t icount; 763153323Srodrigc 764153323Srodrigc /* Make sure the maximum inode count is a multiple of the 765153323Srodrigc * units we allocate inodes in. 766153323Srodrigc */ 767153323Srodrigc 768153323Srodrigc icount = sbp->sb_dblocks * sbp->sb_imax_pct; 769153323Srodrigc do_div(icount, 100); 770153323Srodrigc do_div(icount, mp->m_ialloc_blks); 771153323Srodrigc mp->m_maxicount = (icount * mp->m_ialloc_blks) << 772153323Srodrigc sbp->sb_inopblog; 773153323Srodrigc } else 774153323Srodrigc mp->m_maxicount = 0; 775153323Srodrigc 776153323Srodrigc mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog); 777153323Srodrigc 778153323Srodrigc /* 779153323Srodrigc * XFS uses the uuid from the superblock as the unique 780153323Srodrigc * identifier for fsid. We can not use the uuid from the volume 781153323Srodrigc * since a single partition filesystem is identical to a single 782153323Srodrigc * partition volume/filesystem. 783153323Srodrigc */ 784153323Srodrigc if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && 785153323Srodrigc (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { 786153323Srodrigc if (xfs_uuid_mount(mp)) { 787153323Srodrigc error = XFS_ERROR(EINVAL); 788153323Srodrigc goto error1; 789153323Srodrigc } 790153323Srodrigc uuid_mounted=1; 791153323Srodrigc ret64 = uuid_hash64(&sbp->sb_uuid); 792153323Srodrigc memcpy(&vfsp->vfs_fsid, &ret64, sizeof(ret64)); 793153323Srodrigc } 794153323Srodrigc 795153323Srodrigc /* 796153323Srodrigc * Set the default minimum read and write sizes unless 797153323Srodrigc * already specified in a mount option. 798153323Srodrigc * We use smaller I/O sizes when the file system 799153323Srodrigc * is being used for NFS service (wsync mount option). 800153323Srodrigc */ 801153323Srodrigc if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { 802153323Srodrigc if (mp->m_flags & XFS_MOUNT_WSYNC) { 803153323Srodrigc readio_log = XFS_WSYNC_READIO_LOG; 804153323Srodrigc writeio_log = XFS_WSYNC_WRITEIO_LOG; 805153323Srodrigc } else { 806153323Srodrigc readio_log = XFS_READIO_LOG_LARGE; 807153323Srodrigc writeio_log = XFS_WRITEIO_LOG_LARGE; 808153323Srodrigc } 809153323Srodrigc } else { 810153323Srodrigc readio_log = mp->m_readio_log; 811153323Srodrigc writeio_log = mp->m_writeio_log; 812153323Srodrigc } 813153323Srodrigc 814153323Srodrigc /* 815153323Srodrigc * Set the number of readahead buffers to use based on 816153323Srodrigc * physical memory size. 817153323Srodrigc */ 818153323Srodrigc if (xfs_physmem <= 4096) /* <= 16MB */ 819153323Srodrigc mp->m_nreadaheads = XFS_RW_NREADAHEAD_16MB; 820153323Srodrigc else if (xfs_physmem <= 8192) /* <= 32MB */ 821153323Srodrigc mp->m_nreadaheads = XFS_RW_NREADAHEAD_32MB; 822153323Srodrigc else 823153323Srodrigc mp->m_nreadaheads = XFS_RW_NREADAHEAD_K32; 824153323Srodrigc if (sbp->sb_blocklog > readio_log) { 825153323Srodrigc mp->m_readio_log = sbp->sb_blocklog; 826153323Srodrigc } else { 827153323Srodrigc mp->m_readio_log = readio_log; 828153323Srodrigc } 829153323Srodrigc mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog); 830153323Srodrigc if (sbp->sb_blocklog > writeio_log) { 831153323Srodrigc mp->m_writeio_log = sbp->sb_blocklog; 832153323Srodrigc } else { 833153323Srodrigc mp->m_writeio_log = writeio_log; 834153323Srodrigc } 835153323Srodrigc mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog); 836153323Srodrigc 837153323Srodrigc /* 838153323Srodrigc * Set the inode cluster size based on the physical memory 839153323Srodrigc * size. This may still be overridden by the file system 840153323Srodrigc * block size if it is larger than the chosen cluster size. 841153323Srodrigc */ 842153323Srodrigc if (xfs_physmem <= btoc(32 * 1024 * 1024)) { /* <= 32 MB */ 843153323Srodrigc mp->m_inode_cluster_size = XFS_INODE_SMALL_CLUSTER_SIZE; 844153323Srodrigc } else { 845153323Srodrigc mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; 846153323Srodrigc } 847153323Srodrigc /* 848153323Srodrigc * Set whether we're using inode alignment. 849153323Srodrigc */ 850153323Srodrigc if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) && 851153323Srodrigc mp->m_sb.sb_inoalignmt >= 852153323Srodrigc XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) 853153323Srodrigc mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; 854153323Srodrigc else 855153323Srodrigc mp->m_inoalign_mask = 0; 856153323Srodrigc /* 857153323Srodrigc * If we are using stripe alignment, check whether 858153323Srodrigc * the stripe unit is a multiple of the inode alignment 859153323Srodrigc */ 860153323Srodrigc if (mp->m_dalign && mp->m_inoalign_mask && 861153323Srodrigc !(mp->m_dalign & mp->m_inoalign_mask)) 862153323Srodrigc mp->m_sinoalign = mp->m_dalign; 863153323Srodrigc else 864153323Srodrigc mp->m_sinoalign = 0; 865153323Srodrigc /* 866153323Srodrigc * Check that the data (and log if separate) are an ok size. 867153323Srodrigc */ 868153323Srodrigc d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 869153323Srodrigc if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 870153323Srodrigc cmn_err(CE_WARN, "XFS: size check 1 failed"); 871153323Srodrigc error = XFS_ERROR(E2BIG); 872153323Srodrigc goto error1; 873153323Srodrigc } 874153323Srodrigc error = xfs_read_buf(mp, mp->m_ddev_targp, 875153323Srodrigc d - XFS_FSS_TO_BB(mp, 1), 876153323Srodrigc XFS_FSS_TO_BB(mp, 1), 0, &bp); 877153323Srodrigc if (!error) { 878153323Srodrigc xfs_buf_relse(bp); 879153323Srodrigc } else { 880153323Srodrigc cmn_err(CE_WARN, "XFS: size check 2 failed"); 881153323Srodrigc if (error == ENOSPC) { 882153323Srodrigc error = XFS_ERROR(E2BIG); 883153323Srodrigc } 884153323Srodrigc goto error1; 885153323Srodrigc } 886153323Srodrigc 887153323Srodrigc if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) && 888153323Srodrigc mp->m_logdev_targp != mp->m_ddev_targp) { 889153323Srodrigc d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 890153323Srodrigc if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 891153323Srodrigc cmn_err(CE_WARN, "XFS: size check 3 failed"); 892153323Srodrigc error = XFS_ERROR(E2BIG); 893153323Srodrigc goto error1; 894153323Srodrigc } 895153323Srodrigc error = xfs_read_buf(mp, mp->m_logdev_targp, 896153323Srodrigc d - XFS_FSB_TO_BB(mp, 1), 897153323Srodrigc XFS_FSB_TO_BB(mp, 1), 0, &bp); 898153323Srodrigc if (!error) { 899153323Srodrigc xfs_buf_relse(bp); 900153323Srodrigc } else { 901153323Srodrigc cmn_err(CE_WARN, "XFS: size check 3 failed"); 902153323Srodrigc if (error == ENOSPC) { 903153323Srodrigc error = XFS_ERROR(E2BIG); 904153323Srodrigc } 905153323Srodrigc goto error1; 906153323Srodrigc } 907153323Srodrigc } 908153323Srodrigc 909153323Srodrigc /* 910153323Srodrigc * Initialize realtime fields in the mount structure 911153323Srodrigc */ 912153323Srodrigc if ((error = xfs_rtmount_init(mp))) { 913153323Srodrigc cmn_err(CE_WARN, "XFS: RT mount failed"); 914153323Srodrigc goto error1; 915153323Srodrigc } 916153323Srodrigc 917153323Srodrigc /* 918153323Srodrigc * For client case we are done now 919153323Srodrigc */ 920153323Srodrigc if (mfsi_flags & XFS_MFSI_CLIENT) { 921159451Srodrigc return 0; 922153323Srodrigc } 923153323Srodrigc 924153323Srodrigc /* 925153323Srodrigc * Copies the low order bits of the timestamp and the randomly 926153323Srodrigc * set "sequence" number out of a UUID. 927153323Srodrigc */ 928153323Srodrigc uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid); 929153323Srodrigc 930153323Srodrigc /* 931153323Srodrigc * The vfs structure needs to have a file system independent 932153323Srodrigc * way of checking for the invariant file system ID. Since it 933153323Srodrigc * can't look at mount structures it has a pointer to the data 934153323Srodrigc * in the mount structure. 935153323Srodrigc * 936153323Srodrigc * File systems that don't support user level file handles (i.e. 937153323Srodrigc * all of them except for XFS) will leave vfs_altfsid as NULL. 938153323Srodrigc */ 939153323Srodrigc vfsp->vfs_altfsid = (xfs_fsid_t *)mp->m_fixedfsid; 940153323Srodrigc mp->m_dmevmask = 0; /* not persistent; set after each mount */ 941153323Srodrigc 942153323Srodrigc /* 943153323Srodrigc * Select the right directory manager. 944153323Srodrigc */ 945153323Srodrigc mp->m_dirops = 946153323Srodrigc XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ? 947153323Srodrigc xfsv2_dirops : 948153323Srodrigc xfsv1_dirops; 949153323Srodrigc 950153323Srodrigc /* 951153323Srodrigc * Initialize directory manager's entries. 952153323Srodrigc */ 953153323Srodrigc XFS_DIR_MOUNT(mp); 954153323Srodrigc 955153323Srodrigc /* 956153323Srodrigc * Initialize the attribute manager's entries. 957153323Srodrigc */ 958153323Srodrigc mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100; 959153323Srodrigc 960153323Srodrigc /* 961153323Srodrigc * Initialize the precomputed transaction reservations values. 962153323Srodrigc */ 963153323Srodrigc xfs_trans_init(mp); 964153323Srodrigc 965153323Srodrigc /* 966153323Srodrigc * Allocate and initialize the inode hash table for this 967153323Srodrigc * file system. 968153323Srodrigc */ 969153323Srodrigc xfs_ihash_init(mp); 970153323Srodrigc xfs_chash_init(mp); 971153323Srodrigc 972153323Srodrigc /* 973153323Srodrigc * Allocate and initialize the per-ag data. 974153323Srodrigc */ 975153323Srodrigc init_rwsem(&mp->m_peraglock); 976153323Srodrigc mp->m_perag = 977153323Srodrigc kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP); 978153323Srodrigc 979159451Srodrigc mp->m_maxagi = xfs_initialize_perag(vfsp, mp, sbp->sb_agcount); 980153323Srodrigc 981153323Srodrigc /* 982153323Srodrigc * log's mount-time initialization. Perform 1st part recovery if needed 983153323Srodrigc */ 984153323Srodrigc if (likely(sbp->sb_logblocks > 0)) { /* check for volume case */ 985153323Srodrigc error = xfs_log_mount(mp, mp->m_logdev_targp, 986153323Srodrigc XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), 987153323Srodrigc XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); 988153323Srodrigc if (error) { 989153323Srodrigc cmn_err(CE_WARN, "XFS: log mount failed"); 990153323Srodrigc goto error2; 991153323Srodrigc } 992153323Srodrigc } else { /* No log has been defined */ 993153323Srodrigc cmn_err(CE_WARN, "XFS: no log defined"); 994153323Srodrigc XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp); 995153323Srodrigc error = XFS_ERROR(EFSCORRUPTED); 996153323Srodrigc goto error2; 997153323Srodrigc } 998153323Srodrigc 999153323Srodrigc /* 1000153323Srodrigc * Get and sanity-check the root inode. 1001153323Srodrigc * Save the pointer to it in the mount structure. 1002153323Srodrigc */ 1003159451Srodrigc error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0); 1004153323Srodrigc if (error) { 1005153323Srodrigc cmn_err(CE_WARN, "XFS: failed to read root inode"); 1006153323Srodrigc goto error3; 1007153323Srodrigc } 1008153323Srodrigc 1009153323Srodrigc ASSERT(rip != NULL); 1010153323Srodrigc rvp = XFS_ITOV(rip); 1011153323Srodrigc 1012153323Srodrigc if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { 1013153323Srodrigc cmn_err(CE_WARN, "XFS: corrupted root inode"); 1014159451Srodrigc printf("Root inode %p is not a directory: %llu", 1015159451Srodrigc mp->m_ddev_targp, (unsigned long long)rip->i_ino); 1016153323Srodrigc xfs_iunlock(rip, XFS_ILOCK_EXCL); 1017153323Srodrigc XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, 1018153323Srodrigc mp); 1019153323Srodrigc error = XFS_ERROR(EFSCORRUPTED); 1020153323Srodrigc goto error4; 1021153323Srodrigc } 1022153323Srodrigc mp->m_rootip = rip; /* save it */ 1023153323Srodrigc 1024153323Srodrigc xfs_iunlock(rip, XFS_ILOCK_EXCL); 1025153323Srodrigc 1026153323Srodrigc /* 1027153323Srodrigc * Initialize realtime inode pointers in the mount structure 1028153323Srodrigc */ 1029153323Srodrigc if ((error = xfs_rtmount_inodes(mp))) { 1030153323Srodrigc /* 1031153323Srodrigc * Free up the root inode. 1032153323Srodrigc */ 1033153323Srodrigc cmn_err(CE_WARN, "XFS: failed to read RT inodes"); 1034153323Srodrigc goto error4; 1035153323Srodrigc } 1036153323Srodrigc 1037153323Srodrigc /* 1038153323Srodrigc * If fs is not mounted readonly, then update the superblock 1039153323Srodrigc * unit and width changes. 1040153323Srodrigc */ 1041153323Srodrigc if (update_flags && !(vfsp->vfs_flag & VFS_RDONLY)) 1042153323Srodrigc xfs_mount_log_sbunit(mp, update_flags); 1043153323Srodrigc 1044153323Srodrigc /* 1045153323Srodrigc * Initialise the XFS quota management subsystem for this mount 1046153323Srodrigc */ 1047153323Srodrigc if ((error = XFS_QM_INIT(mp, "amount, "aflags))) 1048153323Srodrigc goto error4; 1049153323Srodrigc 1050153323Srodrigc /* 1051153323Srodrigc * Finish recovering the file system. This part needed to be 1052153323Srodrigc * delayed until after the root and real-time bitmap inodes 1053153323Srodrigc * were consistently read in. 1054153323Srodrigc */ 1055153323Srodrigc error = xfs_log_mount_finish(mp, mfsi_flags); 1056153323Srodrigc if (error) { 1057153323Srodrigc cmn_err(CE_WARN, "XFS: log mount finish failed"); 1058153323Srodrigc goto error4; 1059153323Srodrigc } 1060153323Srodrigc 1061153323Srodrigc /* 1062153323Srodrigc * Complete the quota initialisation, post-log-replay component. 1063153323Srodrigc */ 1064159451Srodrigc if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags))) 1065153323Srodrigc goto error4; 1066153323Srodrigc 1067153323Srodrigc return 0; 1068153323Srodrigc 1069153323Srodrigc error4: 1070153323Srodrigc /* 1071153323Srodrigc * Free up the root inode. 1072153323Srodrigc */ 1073153323Srodrigc VN_RELE(rvp); 1074153323Srodrigc error3: 1075153323Srodrigc xfs_log_unmount_dealloc(mp); 1076153323Srodrigc error2: 1077153323Srodrigc xfs_ihash_free(mp); 1078153323Srodrigc xfs_chash_free(mp); 1079153323Srodrigc for (agno = 0; agno < sbp->sb_agcount; agno++) 1080153323Srodrigc if (mp->m_perag[agno].pagb_list) 1081153323Srodrigc kmem_free(mp->m_perag[agno].pagb_list, 1082153323Srodrigc sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS); 1083153323Srodrigc kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t)); 1084153323Srodrigc mp->m_perag = NULL; 1085153323Srodrigc /* FALLTHROUGH */ 1086153323Srodrigc error1: 1087153323Srodrigc if (uuid_mounted) 1088153323Srodrigc xfs_uuid_unmount(mp); 1089153323Srodrigc xfs_freesb(mp); 1090153323Srodrigc return error; 1091153323Srodrigc} 1092153323Srodrigc 1093153323Srodrigc/* 1094153323Srodrigc * xfs_unmountfs 1095153323Srodrigc * 1096153323Srodrigc * This flushes out the inodes,dquots and the superblock, unmounts the 1097153323Srodrigc * log and makes sure that incore structures are freed. 1098153323Srodrigc */ 1099153323Srodrigcint 1100153323Srodrigcxfs_unmountfs(xfs_mount_t *mp, struct cred *cr) 1101153323Srodrigc{ 1102153323Srodrigc struct xfs_vfs *vfsp = XFS_MTOVFS(mp); 1103153323Srodrigc#if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1104153323Srodrigc int64_t fsid; 1105153323Srodrigc#endif 1106153323Srodrigc 1107159451Srodrigc xfs_iflush_all(mp); 1108153323Srodrigc 1109159451Srodrigc XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); 1110153323Srodrigc 1111153323Srodrigc /* 1112153323Srodrigc * Flush out the log synchronously so that we know for sure 1113153323Srodrigc * that nothing is pinned. This is important because bflush() 1114153323Srodrigc * will skip pinned buffers. 1115153323Srodrigc */ 1116153323Srodrigc xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1117153323Srodrigc 1118153323Srodrigc xfs_binval(mp->m_ddev_targp); 1119153323Srodrigc if (mp->m_rtdev_targp) { 1120153323Srodrigc xfs_binval(mp->m_rtdev_targp); 1121153323Srodrigc } 1122153323Srodrigc 1123153323Srodrigc xfs_unmountfs_writesb(mp); 1124153323Srodrigc 1125159451Srodrigc xfs_unmountfs_wait(mp); /* wait for async bufs */ 1126159451Srodrigc 1127153323Srodrigc xfs_log_unmount(mp); /* Done! No more fs ops. */ 1128153323Srodrigc 1129153323Srodrigc xfs_freesb(mp); 1130153323Srodrigc 1131153323Srodrigc /* 1132153323Srodrigc * All inodes from this mount point should be freed. 1133153323Srodrigc */ 1134159451Srodrigc //ASSERT(mp->m_inodes == NULL); 1135159451Srodrigc if (mp->m_inodes != NULL ) { 1136159451Srodrigc printf("WRONG: mp->m_ireclaims: %d\n", mp->m_ireclaims); 1137159451Srodrigc printf("WRONG: mp->m_inodes: %p\n", mp->m_inodes); 1138153323Srodrigc } 1139153323Srodrigc 1140153323Srodrigc xfs_unmountfs_close(mp, cr); 1141153323Srodrigc if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) 1142153323Srodrigc xfs_uuid_unmount(mp); 1143153323Srodrigc 1144153323Srodrigc#if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1145153323Srodrigc /* 1146153323Srodrigc * clear all error tags on this filesystem 1147153323Srodrigc */ 1148153323Srodrigc memcpy(&fsid, &vfsp->vfs_fsid, sizeof(int64_t)); 1149153323Srodrigc xfs_errortag_clearall_umount(fsid, mp->m_fsname, 0); 1150153323Srodrigc#endif 1151153323Srodrigc XFS_IODONE(vfsp); 1152153323Srodrigc xfs_mount_free(mp, 1); 1153153323Srodrigc return 0; 1154153323Srodrigc} 1155153323Srodrigc 1156153323Srodrigcvoid 1157153323Srodrigcxfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr) 1158153323Srodrigc{ 1159159451Srodrigc if (mp->m_logdev_targp != mp->m_ddev_targp) 1160159451Srodrigc xfs_free_buftarg(mp->m_logdev_targp, 1); 1161159451Srodrigc if (mp->m_rtdev_targp) 1162159451Srodrigc xfs_free_buftarg(mp->m_rtdev_targp, 1); 1163159451Srodrigc xfs_free_buftarg(mp->m_ddev_targp, 0); 1164159451Srodrigc} 1165153323Srodrigc 1166159451SrodrigcSTATIC void 1167159451Srodrigcxfs_unmountfs_wait(xfs_mount_t *mp) 1168159451Srodrigc{ 1169159451Srodrigc if (mp->m_logdev_targp != mp->m_ddev_targp) 1170159451Srodrigc xfs_wait_buftarg(mp->m_logdev_targp); 1171159451Srodrigc if (mp->m_rtdev_targp) 1172159451Srodrigc xfs_wait_buftarg(mp->m_rtdev_targp); 1173159451Srodrigc xfs_wait_buftarg(mp->m_ddev_targp); 1174153323Srodrigc} 1175153323Srodrigc 1176153323Srodrigcint 1177153323Srodrigcxfs_unmountfs_writesb(xfs_mount_t *mp) 1178153323Srodrigc{ 1179153323Srodrigc xfs_buf_t *sbp; 1180153323Srodrigc xfs_sb_t *sb; 1181153323Srodrigc int error = 0; 1182153323Srodrigc 1183153323Srodrigc /* 1184153323Srodrigc * skip superblock write if fs is read-only, or 1185153323Srodrigc * if we are doing a forced umount. 1186153323Srodrigc */ 1187153323Srodrigc sbp = xfs_getsb(mp, 0); 1188153323Srodrigc if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY || 1189153323Srodrigc XFS_FORCED_SHUTDOWN(mp))) { 1190159451Srodrigc 1191159451Srodrigc xfs_icsb_sync_counters(mp); 1192159451Srodrigc 1193153323Srodrigc /* 1194153323Srodrigc * mark shared-readonly if desired 1195153323Srodrigc */ 1196153323Srodrigc sb = XFS_BUF_TO_SBP(sbp); 1197153323Srodrigc if (mp->m_mk_sharedro) { 1198153323Srodrigc if (!(sb->sb_flags & XFS_SBF_READONLY)) 1199153323Srodrigc sb->sb_flags |= XFS_SBF_READONLY; 1200153323Srodrigc if (!XFS_SB_VERSION_HASSHARED(sb)) 1201153323Srodrigc XFS_SB_VERSION_ADDSHARED(sb); 1202153323Srodrigc xfs_fs_cmn_err(CE_NOTE, mp, 1203153323Srodrigc "Unmounting, marking shared read-only"); 1204153323Srodrigc } 1205159451Srodrigc XFS_BUF_UNDONE(sbp); 1206153323Srodrigc XFS_BUF_UNREAD(sbp); 1207153323Srodrigc XFS_BUF_UNDELAYWRITE(sbp); 1208153323Srodrigc XFS_BUF_WRITE(sbp); 1209153323Srodrigc XFS_BUF_UNASYNC(sbp); 1210153323Srodrigc ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); 1211153323Srodrigc xfsbdstrat(mp, sbp); 1212153323Srodrigc /* Nevermind errors we might get here. */ 1213153323Srodrigc error = xfs_iowait(sbp); 1214153323Srodrigc if (error) 1215153323Srodrigc xfs_ioerror_alert("xfs_unmountfs_writesb", 1216153323Srodrigc mp, sbp, XFS_BUF_ADDR(sbp)); 1217153323Srodrigc if (error && mp->m_mk_sharedro) 1218153323Srodrigc xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly"); 1219153323Srodrigc } 1220153323Srodrigc xfs_buf_relse(sbp); 1221159451Srodrigc return error; 1222153323Srodrigc} 1223153323Srodrigc 1224153323Srodrigc/* 1225153323Srodrigc * xfs_mod_sb() can be used to copy arbitrary changes to the 1226153323Srodrigc * in-core superblock into the superblock buffer to be logged. 1227153323Srodrigc * It does not provide the higher level of locking that is 1228153323Srodrigc * needed to protect the in-core superblock from concurrent 1229153323Srodrigc * access. 1230153323Srodrigc */ 1231153323Srodrigcvoid 1232153323Srodrigcxfs_mod_sb(xfs_trans_t *tp, __int64_t fields) 1233153323Srodrigc{ 1234153323Srodrigc xfs_buf_t *bp; 1235153323Srodrigc int first; 1236153323Srodrigc int last; 1237153323Srodrigc xfs_mount_t *mp; 1238153323Srodrigc xfs_sb_t *sbp; 1239153323Srodrigc xfs_sb_field_t f; 1240153323Srodrigc 1241153323Srodrigc ASSERT(fields); 1242153323Srodrigc if (!fields) 1243153323Srodrigc return; 1244153323Srodrigc mp = tp->t_mountp; 1245153323Srodrigc bp = xfs_trans_getsb(tp, mp, 0); 1246153323Srodrigc sbp = XFS_BUF_TO_SBP(bp); 1247153323Srodrigc first = sizeof(xfs_sb_t); 1248153323Srodrigc last = 0; 1249153323Srodrigc 1250153323Srodrigc /* translate/copy */ 1251153323Srodrigc 1252159451Srodrigc xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), -1, fields); 1253153323Srodrigc 1254153323Srodrigc /* find modified range */ 1255153323Srodrigc 1256153323Srodrigc f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 1257153323Srodrigc ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1258153323Srodrigc first = xfs_sb_info[f].offset; 1259153323Srodrigc 1260153323Srodrigc f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields); 1261153323Srodrigc ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1262153323Srodrigc last = xfs_sb_info[f + 1].offset - 1; 1263153323Srodrigc 1264153323Srodrigc xfs_trans_log_buf(tp, bp, first, last); 1265153323Srodrigc} 1266153323Srodrigc/* 1267153323Srodrigc * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply 1268153323Srodrigc * a delta to a specified field in the in-core superblock. Simply 1269153323Srodrigc * switch on the field indicated and apply the delta to that field. 1270153323Srodrigc * Fields are not allowed to dip below zero, so if the delta would 1271153323Srodrigc * do this do not apply it and return EINVAL. 1272153323Srodrigc * 1273153323Srodrigc * The SB_LOCK must be held when this routine is called. 1274153323Srodrigc */ 1275159451Srodrigcint 1276153323Srodrigcxfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, 1277153323Srodrigc int delta, int rsvd) 1278153323Srodrigc{ 1279153323Srodrigc int scounter; /* short counter for 32 bit fields */ 1280153323Srodrigc long long lcounter; /* long counter for 64 bit fields */ 1281153323Srodrigc long long res_used, rem; 1282153323Srodrigc 1283153323Srodrigc /* 1284153323Srodrigc * With the in-core superblock spin lock held, switch 1285153323Srodrigc * on the indicated field. Apply the delta to the 1286153323Srodrigc * proper field. If the fields value would dip below 1287153323Srodrigc * 0, then do not apply the delta and return EINVAL. 1288153323Srodrigc */ 1289153323Srodrigc switch (field) { 1290153323Srodrigc case XFS_SBS_ICOUNT: 1291153323Srodrigc lcounter = (long long)mp->m_sb.sb_icount; 1292153323Srodrigc lcounter += delta; 1293153323Srodrigc if (lcounter < 0) { 1294153323Srodrigc ASSERT(0); 1295159451Srodrigc return XFS_ERROR(EINVAL); 1296153323Srodrigc } 1297153323Srodrigc mp->m_sb.sb_icount = lcounter; 1298159451Srodrigc return 0; 1299153323Srodrigc case XFS_SBS_IFREE: 1300153323Srodrigc lcounter = (long long)mp->m_sb.sb_ifree; 1301153323Srodrigc lcounter += delta; 1302153323Srodrigc if (lcounter < 0) { 1303153323Srodrigc ASSERT(0); 1304159451Srodrigc return XFS_ERROR(EINVAL); 1305153323Srodrigc } 1306153323Srodrigc mp->m_sb.sb_ifree = lcounter; 1307159451Srodrigc return 0; 1308153323Srodrigc case XFS_SBS_FDBLOCKS: 1309153323Srodrigc 1310153323Srodrigc lcounter = (long long)mp->m_sb.sb_fdblocks; 1311153323Srodrigc res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); 1312153323Srodrigc 1313153323Srodrigc if (delta > 0) { /* Putting blocks back */ 1314153323Srodrigc if (res_used > delta) { 1315153323Srodrigc mp->m_resblks_avail += delta; 1316153323Srodrigc } else { 1317153323Srodrigc rem = delta - res_used; 1318153323Srodrigc mp->m_resblks_avail = mp->m_resblks; 1319153323Srodrigc lcounter += rem; 1320153323Srodrigc } 1321153323Srodrigc } else { /* Taking blocks away */ 1322153323Srodrigc 1323153323Srodrigc lcounter += delta; 1324153323Srodrigc 1325153323Srodrigc /* 1326153323Srodrigc * If were out of blocks, use any available reserved blocks if 1327153323Srodrigc * were allowed to. 1328153323Srodrigc */ 1329153323Srodrigc 1330153323Srodrigc if (lcounter < 0) { 1331153323Srodrigc if (rsvd) { 1332153323Srodrigc lcounter = (long long)mp->m_resblks_avail + delta; 1333153323Srodrigc if (lcounter < 0) { 1334159451Srodrigc return XFS_ERROR(ENOSPC); 1335153323Srodrigc } 1336153323Srodrigc mp->m_resblks_avail = lcounter; 1337159451Srodrigc return 0; 1338153323Srodrigc } else { /* not reserved */ 1339159451Srodrigc return XFS_ERROR(ENOSPC); 1340153323Srodrigc } 1341153323Srodrigc } 1342153323Srodrigc } 1343153323Srodrigc 1344153323Srodrigc mp->m_sb.sb_fdblocks = lcounter; 1345159451Srodrigc return 0; 1346153323Srodrigc case XFS_SBS_FREXTENTS: 1347153323Srodrigc lcounter = (long long)mp->m_sb.sb_frextents; 1348153323Srodrigc lcounter += delta; 1349153323Srodrigc if (lcounter < 0) { 1350159451Srodrigc return XFS_ERROR(ENOSPC); 1351153323Srodrigc } 1352153323Srodrigc mp->m_sb.sb_frextents = lcounter; 1353159451Srodrigc return 0; 1354153323Srodrigc case XFS_SBS_DBLOCKS: 1355153323Srodrigc lcounter = (long long)mp->m_sb.sb_dblocks; 1356153323Srodrigc lcounter += delta; 1357153323Srodrigc if (lcounter < 0) { 1358153323Srodrigc ASSERT(0); 1359159451Srodrigc return XFS_ERROR(EINVAL); 1360153323Srodrigc } 1361153323Srodrigc mp->m_sb.sb_dblocks = lcounter; 1362159451Srodrigc return 0; 1363153323Srodrigc case XFS_SBS_AGCOUNT: 1364153323Srodrigc scounter = mp->m_sb.sb_agcount; 1365153323Srodrigc scounter += delta; 1366153323Srodrigc if (scounter < 0) { 1367153323Srodrigc ASSERT(0); 1368159451Srodrigc return XFS_ERROR(EINVAL); 1369153323Srodrigc } 1370153323Srodrigc mp->m_sb.sb_agcount = scounter; 1371159451Srodrigc return 0; 1372153323Srodrigc case XFS_SBS_IMAX_PCT: 1373153323Srodrigc scounter = mp->m_sb.sb_imax_pct; 1374153323Srodrigc scounter += delta; 1375153323Srodrigc if (scounter < 0) { 1376153323Srodrigc ASSERT(0); 1377159451Srodrigc return XFS_ERROR(EINVAL); 1378153323Srodrigc } 1379153323Srodrigc mp->m_sb.sb_imax_pct = scounter; 1380159451Srodrigc return 0; 1381153323Srodrigc case XFS_SBS_REXTSIZE: 1382153323Srodrigc scounter = mp->m_sb.sb_rextsize; 1383153323Srodrigc scounter += delta; 1384153323Srodrigc if (scounter < 0) { 1385153323Srodrigc ASSERT(0); 1386159451Srodrigc return XFS_ERROR(EINVAL); 1387153323Srodrigc } 1388153323Srodrigc mp->m_sb.sb_rextsize = scounter; 1389159451Srodrigc return 0; 1390153323Srodrigc case XFS_SBS_RBMBLOCKS: 1391153323Srodrigc scounter = mp->m_sb.sb_rbmblocks; 1392153323Srodrigc scounter += delta; 1393153323Srodrigc if (scounter < 0) { 1394153323Srodrigc ASSERT(0); 1395159451Srodrigc return XFS_ERROR(EINVAL); 1396153323Srodrigc } 1397153323Srodrigc mp->m_sb.sb_rbmblocks = scounter; 1398159451Srodrigc return 0; 1399153323Srodrigc case XFS_SBS_RBLOCKS: 1400153323Srodrigc lcounter = (long long)mp->m_sb.sb_rblocks; 1401153323Srodrigc lcounter += delta; 1402153323Srodrigc if (lcounter < 0) { 1403153323Srodrigc ASSERT(0); 1404159451Srodrigc return XFS_ERROR(EINVAL); 1405153323Srodrigc } 1406153323Srodrigc mp->m_sb.sb_rblocks = lcounter; 1407159451Srodrigc return 0; 1408153323Srodrigc case XFS_SBS_REXTENTS: 1409153323Srodrigc lcounter = (long long)mp->m_sb.sb_rextents; 1410153323Srodrigc lcounter += delta; 1411153323Srodrigc if (lcounter < 0) { 1412153323Srodrigc ASSERT(0); 1413159451Srodrigc return XFS_ERROR(EINVAL); 1414153323Srodrigc } 1415153323Srodrigc mp->m_sb.sb_rextents = lcounter; 1416159451Srodrigc return 0; 1417153323Srodrigc case XFS_SBS_REXTSLOG: 1418153323Srodrigc scounter = mp->m_sb.sb_rextslog; 1419153323Srodrigc scounter += delta; 1420153323Srodrigc if (scounter < 0) { 1421153323Srodrigc ASSERT(0); 1422159451Srodrigc return XFS_ERROR(EINVAL); 1423153323Srodrigc } 1424153323Srodrigc mp->m_sb.sb_rextslog = scounter; 1425159451Srodrigc return 0; 1426153323Srodrigc default: 1427153323Srodrigc ASSERT(0); 1428159451Srodrigc return XFS_ERROR(EINVAL); 1429153323Srodrigc } 1430153323Srodrigc} 1431153323Srodrigc 1432153323Srodrigc/* 1433153323Srodrigc * xfs_mod_incore_sb() is used to change a field in the in-core 1434153323Srodrigc * superblock structure by the specified delta. This modification 1435153323Srodrigc * is protected by the SB_LOCK. Just use the xfs_mod_incore_sb_unlocked() 1436153323Srodrigc * routine to do the work. 1437153323Srodrigc */ 1438153323Srodrigcint 1439153323Srodrigcxfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd) 1440153323Srodrigc{ 1441153323Srodrigc unsigned long s; 1442153323Srodrigc int status; 1443153323Srodrigc 1444159451Srodrigc /* check for per-cpu counters */ 1445159451Srodrigc switch (field) { 1446159451Srodrigc#ifdef HAVE_PERCPU_SB 1447159451Srodrigc case XFS_SBS_ICOUNT: 1448159451Srodrigc case XFS_SBS_IFREE: 1449159451Srodrigc case XFS_SBS_FDBLOCKS: 1450159451Srodrigc if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1451159451Srodrigc status = xfs_icsb_modify_counters(mp, field, 1452159451Srodrigc delta, rsvd); 1453159451Srodrigc break; 1454159451Srodrigc } 1455159451Srodrigc /* FALLTHROUGH */ 1456159451Srodrigc#endif 1457159451Srodrigc default: 1458159451Srodrigc s = XFS_SB_LOCK(mp); 1459159451Srodrigc status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 1460159451Srodrigc XFS_SB_UNLOCK(mp, s); 1461159451Srodrigc break; 1462159451Srodrigc } 1463159451Srodrigc 1464159451Srodrigc return status; 1465153323Srodrigc} 1466153323Srodrigc 1467153323Srodrigc/* 1468153323Srodrigc * xfs_mod_incore_sb_batch() is used to change more than one field 1469153323Srodrigc * in the in-core superblock structure at a time. This modification 1470153323Srodrigc * is protected by a lock internal to this module. The fields and 1471153323Srodrigc * changes to those fields are specified in the array of xfs_mod_sb 1472153323Srodrigc * structures passed in. 1473153323Srodrigc * 1474153323Srodrigc * Either all of the specified deltas will be applied or none of 1475153323Srodrigc * them will. If any modified field dips below 0, then all modifications 1476153323Srodrigc * will be backed out and EINVAL will be returned. 1477153323Srodrigc */ 1478153323Srodrigcint 1479153323Srodrigcxfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) 1480153323Srodrigc{ 1481153323Srodrigc unsigned long s; 1482153323Srodrigc int status=0; 1483153323Srodrigc xfs_mod_sb_t *msbp; 1484153323Srodrigc 1485153323Srodrigc /* 1486153323Srodrigc * Loop through the array of mod structures and apply each 1487153323Srodrigc * individually. If any fail, then back out all those 1488153323Srodrigc * which have already been applied. Do all of this within 1489153323Srodrigc * the scope of the SB_LOCK so that all of the changes will 1490153323Srodrigc * be atomic. 1491153323Srodrigc */ 1492153323Srodrigc s = XFS_SB_LOCK(mp); 1493153323Srodrigc msbp = &msb[0]; 1494153323Srodrigc for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { 1495153323Srodrigc /* 1496153323Srodrigc * Apply the delta at index n. If it fails, break 1497153323Srodrigc * from the loop so we'll fall into the undo loop 1498153323Srodrigc * below. 1499153323Srodrigc */ 1500159451Srodrigc switch (msbp->msb_field) { 1501159451Srodrigc#ifdef HAVE_PERCPU_SB 1502159451Srodrigc case XFS_SBS_ICOUNT: 1503159451Srodrigc case XFS_SBS_IFREE: 1504159451Srodrigc case XFS_SBS_FDBLOCKS: 1505159451Srodrigc if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1506159451Srodrigc status = xfs_icsb_modify_counters_locked(mp, 1507159451Srodrigc msbp->msb_field, 1508159451Srodrigc msbp->msb_delta, rsvd); 1509159451Srodrigc break; 1510159451Srodrigc } 1511159451Srodrigc /* FALLTHROUGH */ 1512159451Srodrigc#endif 1513159451Srodrigc default: 1514159451Srodrigc status = xfs_mod_incore_sb_unlocked(mp, 1515159451Srodrigc msbp->msb_field, 1516159451Srodrigc msbp->msb_delta, rsvd); 1517159451Srodrigc break; 1518159451Srodrigc } 1519159451Srodrigc 1520153323Srodrigc if (status != 0) { 1521153323Srodrigc break; 1522153323Srodrigc } 1523153323Srodrigc } 1524153323Srodrigc 1525153323Srodrigc /* 1526153323Srodrigc * If we didn't complete the loop above, then back out 1527153323Srodrigc * any changes made to the superblock. If you add code 1528153323Srodrigc * between the loop above and here, make sure that you 1529153323Srodrigc * preserve the value of status. Loop back until 1530153323Srodrigc * we step below the beginning of the array. Make sure 1531153323Srodrigc * we don't touch anything back there. 1532153323Srodrigc */ 1533153323Srodrigc if (status != 0) { 1534153323Srodrigc msbp--; 1535153323Srodrigc while (msbp >= msb) { 1536159451Srodrigc switch (msbp->msb_field) { 1537159451Srodrigc#ifdef HAVE_PERCPU_SB 1538159451Srodrigc case XFS_SBS_ICOUNT: 1539159451Srodrigc case XFS_SBS_IFREE: 1540159451Srodrigc case XFS_SBS_FDBLOCKS: 1541159451Srodrigc if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1542159451Srodrigc status = 1543159451Srodrigc xfs_icsb_modify_counters_locked(mp, 1544159451Srodrigc msbp->msb_field, 1545159451Srodrigc -(msbp->msb_delta), 1546159451Srodrigc rsvd); 1547159451Srodrigc break; 1548159451Srodrigc } 1549159451Srodrigc /* FALLTHROUGH */ 1550159451Srodrigc#endif 1551159451Srodrigc default: 1552159451Srodrigc status = xfs_mod_incore_sb_unlocked(mp, 1553159451Srodrigc msbp->msb_field, 1554159451Srodrigc -(msbp->msb_delta), 1555159451Srodrigc rsvd); 1556159451Srodrigc break; 1557159451Srodrigc } 1558153323Srodrigc ASSERT(status == 0); 1559153323Srodrigc msbp--; 1560153323Srodrigc } 1561153323Srodrigc } 1562153323Srodrigc XFS_SB_UNLOCK(mp, s); 1563159451Srodrigc return status; 1564153323Srodrigc} 1565153323Srodrigc 1566153323Srodrigc/* 1567153323Srodrigc * xfs_getsb() is called to obtain the buffer for the superblock. 1568153323Srodrigc * The buffer is returned locked and read in from disk. 1569153323Srodrigc * The buffer should be released with a call to xfs_brelse(). 1570153323Srodrigc * 1571153323Srodrigc * If the flags parameter is BUF_TRYLOCK, then we'll only return 1572153323Srodrigc * the superblock buffer if it can be locked without sleeping. 1573153323Srodrigc * If it can't then we'll return NULL. 1574153323Srodrigc */ 1575153323Srodrigcxfs_buf_t * 1576153323Srodrigcxfs_getsb( 1577153323Srodrigc xfs_mount_t *mp, 1578153323Srodrigc int flags) 1579153323Srodrigc{ 1580153323Srodrigc xfs_buf_t *bp; 1581159451Srodrigc int extra_flags = 0; 1582159451Srodrigc unsigned int sector_size; 1583153323Srodrigc 1584159451Srodrigc 1585153323Srodrigc bp = mp->m_sb_bp; 1586159451Srodrigc sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); 1587159451Srodrigc#ifdef NOT 1588159451Srodrigc /* MANAGED buf's appear broken in FreeBSD 1589159451Srodrigc * but it's unclear if we need a persistant superblock? 1590159451Srodrigc * since we now translate the ondisk superblock to 1591159451Srodrigc * a separate translated structure and then translate that 1592159451Srodrigc * structure back when we want to write the superblock 1593159451Srodrigc */ 1594159451Srodrigc extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; 1595159451Srodrigc extra_flags = XFS_BUF_MANAGE; 1596159451Srodrigc#endif 1597159451Srodrigc 1598159451Srodrigc mp->m_sb_bp = bp 1599159451Srodrigc = xfs_buf_read_flags(mp->m_ddev_targp, 1600159451Srodrigc XFS_SB_DADDR, 1601159451Srodrigc BTOBB(sector_size), 1602159451Srodrigc extra_flags); 1603159451Srodrigc 1604153323Srodrigc XFS_BUF_HOLD(bp); 1605153323Srodrigc ASSERT(XFS_BUF_ISDONE(bp)); 1606159451Srodrigc if (!XFS_BUF_ISDONE(bp)){ 1607159451Srodrigc printf("xfs_getsb: %p bp flags 0x%x\n",bp,bp->b_flags); 1608159451Srodrigc } 1609159451Srodrigc return bp; 1610153323Srodrigc} 1611153323Srodrigc 1612153323Srodrigc/* 1613153323Srodrigc * Used to free the superblock along various error paths. 1614153323Srodrigc */ 1615153323Srodrigcvoid 1616153323Srodrigcxfs_freesb( 1617153323Srodrigc xfs_mount_t *mp) 1618153323Srodrigc{ 1619153323Srodrigc xfs_buf_t *bp; 1620153323Srodrigc 1621153323Srodrigc /* 1622153323Srodrigc * Use xfs_getsb() so that the buffer will be locked 1623153323Srodrigc * when we call xfs_buf_relse(). 1624153323Srodrigc */ 1625153323Srodrigc bp = xfs_getsb(mp, 0); 1626153323Srodrigc XFS_BUF_UNMANAGE(bp); 1627153323Srodrigc xfs_buf_relse(bp); 1628153323Srodrigc mp->m_sb_bp = NULL; 1629153323Srodrigc} 1630153323Srodrigc 1631153323Srodrigc/* 1632153323Srodrigc * See if the UUID is unique among mounted XFS filesystems. 1633153323Srodrigc * Mount fails if UUID is nil or a FS with the same UUID is already mounted. 1634153323Srodrigc */ 1635153323SrodrigcSTATIC int 1636153323Srodrigcxfs_uuid_mount( 1637153323Srodrigc xfs_mount_t *mp) 1638153323Srodrigc{ 1639153323Srodrigc if (uuid_is_nil(&mp->m_sb.sb_uuid)) { 1640153323Srodrigc cmn_err(CE_WARN, 1641153323Srodrigc "XFS: Filesystem %s has nil UUID - can't mount", 1642153323Srodrigc mp->m_fsname); 1643153323Srodrigc return -1; 1644153323Srodrigc } 1645153323Srodrigc if (!uuid_table_insert(&mp->m_sb.sb_uuid)) { 1646153323Srodrigc cmn_err(CE_WARN, 1647153323Srodrigc "XFS: Filesystem %s has duplicate UUID - can't mount", 1648153323Srodrigc mp->m_fsname); 1649153323Srodrigc return -1; 1650153323Srodrigc } 1651153323Srodrigc return 0; 1652153323Srodrigc} 1653153323Srodrigc 1654153323Srodrigc/* 1655153323Srodrigc * Remove filesystem from the UUID table. 1656153323Srodrigc */ 1657153323SrodrigcSTATIC void 1658153323Srodrigcxfs_uuid_unmount( 1659153323Srodrigc xfs_mount_t *mp) 1660153323Srodrigc{ 1661153323Srodrigc uuid_table_remove(&mp->m_sb.sb_uuid); 1662153323Srodrigc} 1663153323Srodrigc 1664153323Srodrigc/* 1665153323Srodrigc * Used to log changes to the superblock unit and width fields which could 1666153323Srodrigc * be altered by the mount options. Only the first superblock is updated. 1667153323Srodrigc */ 1668153323SrodrigcSTATIC void 1669153323Srodrigcxfs_mount_log_sbunit( 1670153323Srodrigc xfs_mount_t *mp, 1671153323Srodrigc __int64_t fields) 1672153323Srodrigc{ 1673153323Srodrigc xfs_trans_t *tp; 1674153323Srodrigc 1675153323Srodrigc ASSERT(fields & (XFS_SB_UNIT|XFS_SB_WIDTH|XFS_SB_UUID)); 1676153323Srodrigc 1677153323Srodrigc tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); 1678153323Srodrigc if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1679153323Srodrigc XFS_DEFAULT_LOG_COUNT)) { 1680153323Srodrigc xfs_trans_cancel(tp, 0); 1681153323Srodrigc return; 1682153323Srodrigc } 1683153323Srodrigc xfs_mod_sb(tp, fields); 1684153323Srodrigc xfs_trans_commit(tp, 0, NULL); 1685153323Srodrigc} 1686153323Srodrigc 1687159451Srodrigc 1688159451Srodrigc#ifdef HAVE_PERCPU_SB 1689159451Srodrigc/* 1690159451Srodrigc * Per-cpu incore superblock counters 1691159451Srodrigc * 1692159451Srodrigc * Simple concept, difficult implementation 1693159451Srodrigc * 1694159451Srodrigc * Basically, replace the incore superblock counters with a distributed per cpu 1695159451Srodrigc * counter for contended fields (e.g. free block count). 1696159451Srodrigc * 1697159451Srodrigc * Difficulties arise in that the incore sb is used for ENOSPC checking, and 1698159451Srodrigc * hence needs to be accurately read when we are running low on space. Hence 1699159451Srodrigc * there is a method to enable and disable the per-cpu counters based on how 1700159451Srodrigc * much "stuff" is available in them. 1701159451Srodrigc * 1702159451Srodrigc * Basically, a counter is enabled if there is enough free resource to justify 1703159451Srodrigc * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local 1704159451Srodrigc * ENOSPC), then we disable the counters to synchronise all callers and 1705159451Srodrigc * re-distribute the available resources. 1706159451Srodrigc * 1707159451Srodrigc * If, once we redistributed the available resources, we still get a failure, 1708159451Srodrigc * we disable the per-cpu counter and go through the slow path. 1709159451Srodrigc * 1710159451Srodrigc * The slow path is the current xfs_mod_incore_sb() function. This means that 1711159451Srodrigc * when we disable a per-cpu counter, we need to drain it's resources back to 1712159451Srodrigc * the global superblock. We do this after disabling the counter to prevent 1713159451Srodrigc * more threads from queueing up on the counter. 1714159451Srodrigc * 1715159451Srodrigc * Essentially, this means that we still need a lock in the fast path to enable 1716159451Srodrigc * synchronisation between the global counters and the per-cpu counters. This 1717159451Srodrigc * is not a problem because the lock will be local to a CPU almost all the time 1718159451Srodrigc * and have little contention except when we get to ENOSPC conditions. 1719159451Srodrigc * 1720159451Srodrigc * Basically, this lock becomes a barrier that enables us to lock out the fast 1721159451Srodrigc * path while we do things like enabling and disabling counters and 1722159451Srodrigc * synchronising the counters. 1723159451Srodrigc * 1724159451Srodrigc * Locking rules: 1725159451Srodrigc * 1726159451Srodrigc * 1. XFS_SB_LOCK() before picking up per-cpu locks 1727159451Srodrigc * 2. per-cpu locks always picked up via for_each_online_cpu() order 1728159451Srodrigc * 3. accurate counter sync requires XFS_SB_LOCK + per cpu locks 1729159451Srodrigc * 4. modifying per-cpu counters requires holding per-cpu lock 1730159451Srodrigc * 5. modifying global counters requires holding XFS_SB_LOCK 1731159451Srodrigc * 6. enabling or disabling a counter requires holding the XFS_SB_LOCK 1732159451Srodrigc * and _none_ of the per-cpu locks. 1733159451Srodrigc * 1734159451Srodrigc * Disabled counters are only ever re-enabled by a balance operation 1735159451Srodrigc * that results in more free resources per CPU than a given threshold. 1736159451Srodrigc * To ensure counters don't remain disabled, they are rebalanced when 1737159451Srodrigc * the global resource goes above a higher threshold (i.e. some hysteresis 1738159451Srodrigc * is present to prevent thrashing). 1739153323Srodrigc */ 1740153323Srodrigc 1741159451Srodrigc/* 1742159451Srodrigc * hot-plug CPU notifier support. 1743159451Srodrigc * 1744159451Srodrigc * We cannot use the hotcpu_register() function because it does 1745159451Srodrigc * not allow notifier instances. We need a notifier per filesystem 1746159451Srodrigc * as we need to be able to identify the filesystem to balance 1747159451Srodrigc * the counters out. This is achieved by having a notifier block 1748159451Srodrigc * embedded in the xfs_mount_t and doing pointer magic to get the 1749159451Srodrigc * mount pointer from the notifier block address. 1750159451Srodrigc */ 1751159451SrodrigcSTATIC int 1752159451Srodrigcxfs_icsb_cpu_notify( 1753159451Srodrigc struct notifier_block *nfb, 1754159451Srodrigc unsigned long action, 1755159451Srodrigc void *hcpu) 1756159451Srodrigc{ 1757159451Srodrigc xfs_icsb_cnts_t *cntp; 1758159451Srodrigc xfs_mount_t *mp; 1759159451Srodrigc int s; 1760159451Srodrigc 1761159451Srodrigc mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier); 1762159451Srodrigc cntp = (xfs_icsb_cnts_t *) 1763159451Srodrigc per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); 1764159451Srodrigc switch (action) { 1765159451Srodrigc case CPU_UP_PREPARE: 1766159451Srodrigc /* Easy Case - initialize the area and locks, and 1767159451Srodrigc * then rebalance when online does everything else for us. */ 1768159451Srodrigc memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1769159451Srodrigc break; 1770159451Srodrigc case CPU_ONLINE: 1771159451Srodrigc xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); 1772159451Srodrigc xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); 1773159451Srodrigc xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); 1774159451Srodrigc break; 1775159451Srodrigc case CPU_DEAD: 1776159451Srodrigc /* Disable all the counters, then fold the dead cpu's 1777159451Srodrigc * count into the total on the global superblock and 1778159451Srodrigc * re-enable the counters. */ 1779159451Srodrigc s = XFS_SB_LOCK(mp); 1780159451Srodrigc xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT); 1781159451Srodrigc xfs_icsb_disable_counter(mp, XFS_SBS_IFREE); 1782159451Srodrigc xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS); 1783159451Srodrigc 1784159451Srodrigc mp->m_sb.sb_icount += cntp->icsb_icount; 1785159451Srodrigc mp->m_sb.sb_ifree += cntp->icsb_ifree; 1786159451Srodrigc mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks; 1787159451Srodrigc 1788159451Srodrigc memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1789159451Srodrigc 1790159451Srodrigc xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, XFS_ICSB_SB_LOCKED); 1791159451Srodrigc xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, XFS_ICSB_SB_LOCKED); 1792159451Srodrigc xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, XFS_ICSB_SB_LOCKED); 1793159451Srodrigc XFS_SB_UNLOCK(mp, s); 1794159451Srodrigc break; 1795159451Srodrigc } 1796159451Srodrigc 1797159451Srodrigc return NOTIFY_OK; 1798159451Srodrigc} 1799159451Srodrigc 1800159451Srodrigcint 1801159451Srodrigcxfs_icsb_init_counters( 1802159451Srodrigc xfs_mount_t *mp) 1803159451Srodrigc{ 1804159451Srodrigc xfs_icsb_cnts_t *cntp; 1805159451Srodrigc int i; 1806159451Srodrigc 1807159451Srodrigc mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t); 1808159451Srodrigc if (mp->m_sb_cnts == NULL) 1809159451Srodrigc return -ENOMEM; 1810159451Srodrigc 1811159451Srodrigc mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify; 1812159451Srodrigc mp->m_icsb_notifier.priority = 0; 1813159451Srodrigc register_cpu_notifier(&mp->m_icsb_notifier); 1814159451Srodrigc 1815159451Srodrigc for_each_online_cpu(i) { 1816159451Srodrigc cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1817159451Srodrigc memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1818159451Srodrigc } 1819159451Srodrigc /* 1820159451Srodrigc * start with all counters disabled so that the 1821159451Srodrigc * initial balance kicks us off correctly 1822159451Srodrigc */ 1823159451Srodrigc mp->m_icsb_counters = -1; 1824159451Srodrigc return 0; 1825159451Srodrigc} 1826159451Srodrigc 1827159451SrodrigcSTATIC void 1828159451Srodrigcxfs_icsb_destroy_counters( 1829159451Srodrigc xfs_mount_t *mp) 1830159451Srodrigc{ 1831159451Srodrigc if (mp->m_sb_cnts) { 1832159451Srodrigc unregister_cpu_notifier(&mp->m_icsb_notifier); 1833159451Srodrigc free_percpu(mp->m_sb_cnts); 1834159451Srodrigc } 1835159451Srodrigc} 1836159451Srodrigc 1837159451SrodrigcSTATIC inline void 1838159451Srodrigcxfs_icsb_lock_cntr( 1839159451Srodrigc xfs_icsb_cnts_t *icsbp) 1840159451Srodrigc{ 1841159451Srodrigc while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) { 1842159451Srodrigc ndelay(1000); 1843159451Srodrigc } 1844159451Srodrigc} 1845159451Srodrigc 1846159451SrodrigcSTATIC inline void 1847159451Srodrigcxfs_icsb_unlock_cntr( 1848159451Srodrigc xfs_icsb_cnts_t *icsbp) 1849159451Srodrigc{ 1850159451Srodrigc clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags); 1851159451Srodrigc} 1852159451Srodrigc 1853159451Srodrigc 1854159451SrodrigcSTATIC inline void 1855159451Srodrigcxfs_icsb_lock_all_counters( 1856159451Srodrigc xfs_mount_t *mp) 1857159451Srodrigc{ 1858159451Srodrigc xfs_icsb_cnts_t *cntp; 1859159451Srodrigc int i; 1860159451Srodrigc 1861159451Srodrigc for_each_online_cpu(i) { 1862159451Srodrigc cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1863159451Srodrigc xfs_icsb_lock_cntr(cntp); 1864159451Srodrigc } 1865159451Srodrigc} 1866159451Srodrigc 1867159451SrodrigcSTATIC inline void 1868159451Srodrigcxfs_icsb_unlock_all_counters( 1869159451Srodrigc xfs_mount_t *mp) 1870159451Srodrigc{ 1871159451Srodrigc xfs_icsb_cnts_t *cntp; 1872159451Srodrigc int i; 1873159451Srodrigc 1874159451Srodrigc for_each_online_cpu(i) { 1875159451Srodrigc cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1876159451Srodrigc xfs_icsb_unlock_cntr(cntp); 1877159451Srodrigc } 1878159451Srodrigc} 1879159451Srodrigc 1880159451SrodrigcSTATIC void 1881159451Srodrigcxfs_icsb_count( 1882153323Srodrigc xfs_mount_t *mp, 1883159451Srodrigc xfs_icsb_cnts_t *cnt, 1884159451Srodrigc int flags) 1885153323Srodrigc{ 1886159451Srodrigc xfs_icsb_cnts_t *cntp; 1887159451Srodrigc int i; 1888153323Srodrigc 1889159451Srodrigc memset(cnt, 0, sizeof(xfs_icsb_cnts_t)); 1890153323Srodrigc 1891159451Srodrigc if (!(flags & XFS_ICSB_LAZY_COUNT)) 1892159451Srodrigc xfs_icsb_lock_all_counters(mp); 1893159451Srodrigc 1894159451Srodrigc for_each_online_cpu(i) { 1895159451Srodrigc cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1896159451Srodrigc cnt->icsb_icount += cntp->icsb_icount; 1897159451Srodrigc cnt->icsb_ifree += cntp->icsb_ifree; 1898159451Srodrigc cnt->icsb_fdblocks += cntp->icsb_fdblocks; 1899153323Srodrigc } 1900159451Srodrigc 1901159451Srodrigc if (!(flags & XFS_ICSB_LAZY_COUNT)) 1902159451Srodrigc xfs_icsb_unlock_all_counters(mp); 1903153323Srodrigc} 1904153323Srodrigc 1905159451SrodrigcSTATIC int 1906159451Srodrigcxfs_icsb_counter_disabled( 1907159451Srodrigc xfs_mount_t *mp, 1908159451Srodrigc xfs_sb_field_t field) 1909159451Srodrigc{ 1910159451Srodrigc ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 1911159451Srodrigc return test_bit(field, &mp->m_icsb_counters); 1912159451Srodrigc} 1913159451Srodrigc 1914159451SrodrigcSTATIC int 1915159451Srodrigcxfs_icsb_disable_counter( 1916159451Srodrigc xfs_mount_t *mp, 1917159451Srodrigc xfs_sb_field_t field) 1918159451Srodrigc{ 1919159451Srodrigc xfs_icsb_cnts_t cnt; 1920159451Srodrigc 1921159451Srodrigc ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 1922159451Srodrigc 1923159451Srodrigc xfs_icsb_lock_all_counters(mp); 1924159451Srodrigc if (!test_and_set_bit(field, &mp->m_icsb_counters)) { 1925159451Srodrigc /* drain back to superblock */ 1926159451Srodrigc 1927159451Srodrigc xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT); 1928159451Srodrigc switch(field) { 1929159451Srodrigc case XFS_SBS_ICOUNT: 1930159451Srodrigc mp->m_sb.sb_icount = cnt.icsb_icount; 1931159451Srodrigc break; 1932159451Srodrigc case XFS_SBS_IFREE: 1933159451Srodrigc mp->m_sb.sb_ifree = cnt.icsb_ifree; 1934159451Srodrigc break; 1935159451Srodrigc case XFS_SBS_FDBLOCKS: 1936159451Srodrigc mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 1937159451Srodrigc break; 1938159451Srodrigc default: 1939159451Srodrigc BUG(); 1940159451Srodrigc } 1941159451Srodrigc } 1942159451Srodrigc 1943159451Srodrigc xfs_icsb_unlock_all_counters(mp); 1944159451Srodrigc 1945159451Srodrigc return 0; 1946159451Srodrigc} 1947159451Srodrigc 1948159451SrodrigcSTATIC void 1949159451Srodrigcxfs_icsb_enable_counter( 1950159451Srodrigc xfs_mount_t *mp, 1951159451Srodrigc xfs_sb_field_t field, 1952159451Srodrigc uint64_t count, 1953159451Srodrigc uint64_t resid) 1954159451Srodrigc{ 1955159451Srodrigc xfs_icsb_cnts_t *cntp; 1956159451Srodrigc int i; 1957159451Srodrigc 1958159451Srodrigc ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 1959159451Srodrigc 1960159451Srodrigc xfs_icsb_lock_all_counters(mp); 1961159451Srodrigc for_each_online_cpu(i) { 1962159451Srodrigc cntp = per_cpu_ptr(mp->m_sb_cnts, i); 1963159451Srodrigc switch (field) { 1964159451Srodrigc case XFS_SBS_ICOUNT: 1965159451Srodrigc cntp->icsb_icount = count + resid; 1966159451Srodrigc break; 1967159451Srodrigc case XFS_SBS_IFREE: 1968159451Srodrigc cntp->icsb_ifree = count + resid; 1969159451Srodrigc break; 1970159451Srodrigc case XFS_SBS_FDBLOCKS: 1971159451Srodrigc cntp->icsb_fdblocks = count + resid; 1972159451Srodrigc break; 1973159451Srodrigc default: 1974159451Srodrigc BUG(); 1975159451Srodrigc break; 1976159451Srodrigc } 1977159451Srodrigc resid = 0; 1978159451Srodrigc } 1979159451Srodrigc clear_bit(field, &mp->m_icsb_counters); 1980159451Srodrigc xfs_icsb_unlock_all_counters(mp); 1981159451Srodrigc} 1982159451Srodrigc 1983159451SrodrigcSTATIC void 1984159451Srodrigcxfs_icsb_sync_counters_int( 1985159451Srodrigc xfs_mount_t *mp, 1986159451Srodrigc int flags) 1987159451Srodrigc{ 1988159451Srodrigc xfs_icsb_cnts_t cnt; 1989159451Srodrigc int s; 1990159451Srodrigc 1991159451Srodrigc /* Pass 1: lock all counters */ 1992159451Srodrigc if ((flags & XFS_ICSB_SB_LOCKED) == 0) 1993159451Srodrigc s = XFS_SB_LOCK(mp); 1994159451Srodrigc 1995159451Srodrigc xfs_icsb_count(mp, &cnt, flags); 1996159451Srodrigc 1997159451Srodrigc /* Step 3: update mp->m_sb fields */ 1998159451Srodrigc if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT)) 1999159451Srodrigc mp->m_sb.sb_icount = cnt.icsb_icount; 2000159451Srodrigc if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE)) 2001159451Srodrigc mp->m_sb.sb_ifree = cnt.icsb_ifree; 2002159451Srodrigc if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS)) 2003159451Srodrigc mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 2004159451Srodrigc 2005159451Srodrigc if ((flags & XFS_ICSB_SB_LOCKED) == 0) 2006159451Srodrigc XFS_SB_UNLOCK(mp, s); 2007159451Srodrigc} 2008159451Srodrigc 2009159451Srodrigc/* 2010159451Srodrigc * Accurate update of per-cpu counters to incore superblock 2011159451Srodrigc */ 2012159451SrodrigcSTATIC void 2013159451Srodrigcxfs_icsb_sync_counters( 2014159451Srodrigc xfs_mount_t *mp) 2015159451Srodrigc{ 2016159451Srodrigc xfs_icsb_sync_counters_int(mp, 0); 2017159451Srodrigc} 2018159451Srodrigc 2019159451Srodrigc/* 2020159451Srodrigc * lazy addition used for things like df, background sb syncs, etc 2021159451Srodrigc */ 2022153323Srodrigcvoid 2023159451Srodrigcxfs_icsb_sync_counters_lazy( 2024153323Srodrigc xfs_mount_t *mp) 2025153323Srodrigc{ 2026159451Srodrigc xfs_icsb_sync_counters_int(mp, XFS_ICSB_LAZY_COUNT); 2027159451Srodrigc} 2028153323Srodrigc 2029159451Srodrigc/* 2030159451Srodrigc * Balance and enable/disable counters as necessary. 2031159451Srodrigc * 2032159451Srodrigc * Thresholds for re-enabling counters are somewhat magic. 2033159451Srodrigc * inode counts are chosen to be the same number as single 2034159451Srodrigc * on disk allocation chunk per CPU, and free blocks is 2035159451Srodrigc * something far enough zero that we aren't going thrash 2036159451Srodrigc * when we get near ENOSPC. 2037159451Srodrigc */ 2038159451Srodrigc#define XFS_ICSB_INO_CNTR_REENABLE 64 2039159451Srodrigc#define XFS_ICSB_FDBLK_CNTR_REENABLE 512 2040159451SrodrigcSTATIC void 2041159451Srodrigcxfs_icsb_balance_counter( 2042159451Srodrigc xfs_mount_t *mp, 2043159451Srodrigc xfs_sb_field_t field, 2044159451Srodrigc int flags) 2045159451Srodrigc{ 2046159451Srodrigc uint64_t count, resid = 0; 2047159451Srodrigc int weight = num_online_cpus(); 2048159451Srodrigc int s; 2049159451Srodrigc 2050159451Srodrigc if (!(flags & XFS_ICSB_SB_LOCKED)) 2051159451Srodrigc s = XFS_SB_LOCK(mp); 2052159451Srodrigc 2053159451Srodrigc /* disable counter and sync counter */ 2054159451Srodrigc xfs_icsb_disable_counter(mp, field); 2055159451Srodrigc 2056159451Srodrigc /* update counters - first CPU gets residual*/ 2057159451Srodrigc switch (field) { 2058159451Srodrigc case XFS_SBS_ICOUNT: 2059159451Srodrigc count = mp->m_sb.sb_icount; 2060159451Srodrigc resid = do_div(count, weight); 2061159451Srodrigc if (count < XFS_ICSB_INO_CNTR_REENABLE) 2062159451Srodrigc goto out; 2063159451Srodrigc break; 2064159451Srodrigc case XFS_SBS_IFREE: 2065159451Srodrigc count = mp->m_sb.sb_ifree; 2066159451Srodrigc resid = do_div(count, weight); 2067159451Srodrigc if (count < XFS_ICSB_INO_CNTR_REENABLE) 2068159451Srodrigc goto out; 2069159451Srodrigc break; 2070159451Srodrigc case XFS_SBS_FDBLOCKS: 2071159451Srodrigc count = mp->m_sb.sb_fdblocks; 2072159451Srodrigc resid = do_div(count, weight); 2073159451Srodrigc if (count < XFS_ICSB_FDBLK_CNTR_REENABLE) 2074159451Srodrigc goto out; 2075159451Srodrigc break; 2076159451Srodrigc default: 2077159451Srodrigc BUG(); 2078159451Srodrigc break; 2079153323Srodrigc } 2080153323Srodrigc 2081159451Srodrigc xfs_icsb_enable_counter(mp, field, count, resid); 2082159451Srodrigcout: 2083159451Srodrigc if (!(flags & XFS_ICSB_SB_LOCKED)) 2084159451Srodrigc XFS_SB_UNLOCK(mp, s); 2085153323Srodrigc} 2086153323Srodrigc 2087159451SrodrigcSTATIC int 2088159451Srodrigcxfs_icsb_modify_counters_int( 2089153323Srodrigc xfs_mount_t *mp, 2090159451Srodrigc xfs_sb_field_t field, 2091159451Srodrigc int delta, 2092159451Srodrigc int rsvd, 2093159451Srodrigc int flags) 2094153323Srodrigc{ 2095159451Srodrigc xfs_icsb_cnts_t *icsbp; 2096159451Srodrigc long long lcounter; /* long counter for 64 bit fields */ 2097159451Srodrigc int cpu, s, locked = 0; 2098159451Srodrigc int ret = 0, balance_done = 0; 2099153323Srodrigc 2100159451Srodrigcagain: 2101159451Srodrigc cpu = get_cpu(); 2102159451Srodrigc icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu), 2103159451Srodrigc xfs_icsb_lock_cntr(icsbp); 2104159451Srodrigc if (unlikely(xfs_icsb_counter_disabled(mp, field))) 2105159451Srodrigc goto slow_path; 2106153323Srodrigc 2107159451Srodrigc switch (field) { 2108159451Srodrigc case XFS_SBS_ICOUNT: 2109159451Srodrigc lcounter = icsbp->icsb_icount; 2110159451Srodrigc lcounter += delta; 2111159451Srodrigc if (unlikely(lcounter < 0)) 2112159451Srodrigc goto slow_path; 2113159451Srodrigc icsbp->icsb_icount = lcounter; 2114159451Srodrigc break; 2115159451Srodrigc 2116159451Srodrigc case XFS_SBS_IFREE: 2117159451Srodrigc lcounter = icsbp->icsb_ifree; 2118159451Srodrigc lcounter += delta; 2119159451Srodrigc if (unlikely(lcounter < 0)) 2120159451Srodrigc goto slow_path; 2121159451Srodrigc icsbp->icsb_ifree = lcounter; 2122159451Srodrigc break; 2123159451Srodrigc 2124159451Srodrigc case XFS_SBS_FDBLOCKS: 2125159451Srodrigc BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); 2126159451Srodrigc 2127159451Srodrigc lcounter = icsbp->icsb_fdblocks; 2128159451Srodrigc lcounter += delta; 2129159451Srodrigc if (unlikely(lcounter < 0)) 2130159451Srodrigc goto slow_path; 2131159451Srodrigc icsbp->icsb_fdblocks = lcounter; 2132159451Srodrigc break; 2133159451Srodrigc default: 2134159451Srodrigc BUG(); 2135159451Srodrigc break; 2136153323Srodrigc } 2137159451Srodrigc xfs_icsb_unlock_cntr(icsbp); 2138159451Srodrigc put_cpu(); 2139159451Srodrigc if (locked) 2140159451Srodrigc XFS_SB_UNLOCK(mp, s); 2141159451Srodrigc return 0; 2142153323Srodrigc 2143159451Srodrigc /* 2144159451Srodrigc * The slow path needs to be run with the SBLOCK 2145159451Srodrigc * held so that we prevent other threads from 2146159451Srodrigc * attempting to run this path at the same time. 2147159451Srodrigc * this provides exclusion for the balancing code, 2148159451Srodrigc * and exclusive fallback if the balance does not 2149159451Srodrigc * provide enough resources to continue in an unlocked 2150159451Srodrigc * manner. 2151159451Srodrigc */ 2152159451Srodrigcslow_path: 2153159451Srodrigc xfs_icsb_unlock_cntr(icsbp); 2154159451Srodrigc put_cpu(); 2155159451Srodrigc 2156159451Srodrigc /* need to hold superblock incase we need 2157159451Srodrigc * to disable a counter */ 2158159451Srodrigc if (!(flags & XFS_ICSB_SB_LOCKED)) { 2159159451Srodrigc s = XFS_SB_LOCK(mp); 2160159451Srodrigc locked = 1; 2161159451Srodrigc flags |= XFS_ICSB_SB_LOCKED; 2162159451Srodrigc } 2163159451Srodrigc if (!balance_done) { 2164159451Srodrigc xfs_icsb_balance_counter(mp, field, flags); 2165159451Srodrigc balance_done = 1; 2166159451Srodrigc goto again; 2167159451Srodrigc } else { 2168159451Srodrigc /* 2169159451Srodrigc * we might not have enough on this local 2170159451Srodrigc * cpu to allocate for a bulk request. 2171159451Srodrigc * We need to drain this field from all CPUs 2172159451Srodrigc * and disable the counter fastpath 2173159451Srodrigc */ 2174159451Srodrigc xfs_icsb_disable_counter(mp, field); 2175159451Srodrigc } 2176159451Srodrigc 2177159451Srodrigc ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 2178159451Srodrigc 2179159451Srodrigc if (locked) 2180159451Srodrigc XFS_SB_UNLOCK(mp, s); 2181159451Srodrigc return ret; 2182153323Srodrigc} 2183153323Srodrigc 2184159451SrodrigcSTATIC int 2185159451Srodrigcxfs_icsb_modify_counters( 2186159451Srodrigc xfs_mount_t *mp, 2187159451Srodrigc xfs_sb_field_t field, 2188159451Srodrigc int delta, 2189159451Srodrigc int rsvd) 2190159451Srodrigc{ 2191159451Srodrigc return xfs_icsb_modify_counters_int(mp, field, delta, rsvd, 0); 2192159451Srodrigc} 2193159451Srodrigc 2194159451Srodrigc/* 2195159451Srodrigc * Called when superblock is already locked 2196159451Srodrigc */ 2197159451SrodrigcSTATIC int 2198159451Srodrigcxfs_icsb_modify_counters_locked( 2199159451Srodrigc xfs_mount_t *mp, 2200159451Srodrigc xfs_sb_field_t field, 2201159451Srodrigc int delta, 2202159451Srodrigc int rsvd) 2203159451Srodrigc{ 2204159451Srodrigc return xfs_icsb_modify_counters_int(mp, field, delta, 2205159451Srodrigc rsvd, XFS_ICSB_SB_LOCKED); 2206159451Srodrigc} 2207159451Srodrigc#endif 2208