1/* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18#include "xfs.h" 19#include "xfs_fs.h" 20#include "xfs_types.h" 21#include "xfs_bit.h" 22#include "xfs_log.h" 23#include "xfs_inum.h" 24#include "xfs_trans.h" 25#include "xfs_sb.h" 26#include "xfs_ag.h" 27#include "xfs_dir.h" 28#include "xfs_dir2.h" 29#include "xfs_dmapi.h" 30#include "xfs_mount.h" 31#include "xfs_bmap_btree.h" 32#include "xfs_alloc_btree.h" 33#include "xfs_ialloc_btree.h" 34#include "xfs_dir_sf.h" 35#include "xfs_dir2_sf.h" 36#include "xfs_attr_sf.h" 37#include "xfs_dinode.h" 38#include "xfs_inode.h" 39#include "xfs_btree.h" 40#include "xfs_ialloc.h" 41#include "xfs_alloc.h" 42#include "xfs_rtalloc.h" 43#include "xfs_bmap.h" 44#include "xfs_error.h" 45#include "xfs_rw.h" 46#include "xfs_quota.h" 47#include "xfs_fsops.h" 48 49STATIC void xfs_mount_log_sbunit(xfs_mount_t *, __int64_t); 50STATIC int xfs_uuid_mount(xfs_mount_t *); 51STATIC void xfs_uuid_unmount(xfs_mount_t *mp); 52STATIC void xfs_unmountfs_wait(xfs_mount_t *); 53 54 55#ifdef HAVE_PERCPU_SB 56STATIC void xfs_icsb_destroy_counters(xfs_mount_t *); 57STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int); 58STATIC void xfs_icsb_sync_counters(xfs_mount_t *); 59STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, 60 int, int); 61STATIC int xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t, 62 int, int); 63STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); 64 65#else 66 67#define xfs_icsb_destroy_counters(mp) do { } while (0) 68#define xfs_icsb_balance_counter(mp, a, b) do { } while (0) 69#define xfs_icsb_sync_counters(mp) do { } while (0) 70#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) 71#define xfs_icsb_modify_counters_locked(mp, a, b, c) do { } while (0) 72 73#endif 74 75static const struct { 76 short offset; 77 short type; /* 0 = integer 78 * 1 = binary / string (no translation) 79 */ 80} xfs_sb_info[] = { 81 { offsetof(xfs_sb_t, sb_magicnum), 0 }, 82 { offsetof(xfs_sb_t, sb_blocksize), 0 }, 83 { offsetof(xfs_sb_t, sb_dblocks), 0 }, 84 { offsetof(xfs_sb_t, sb_rblocks), 0 }, 85 { offsetof(xfs_sb_t, sb_rextents), 0 }, 86 { offsetof(xfs_sb_t, sb_uuid), 1 }, 87 { offsetof(xfs_sb_t, sb_logstart), 0 }, 88 { offsetof(xfs_sb_t, sb_rootino), 0 }, 89 { offsetof(xfs_sb_t, sb_rbmino), 0 }, 90 { offsetof(xfs_sb_t, sb_rsumino), 0 }, 91 { offsetof(xfs_sb_t, sb_rextsize), 0 }, 92 { offsetof(xfs_sb_t, sb_agblocks), 0 }, 93 { offsetof(xfs_sb_t, sb_agcount), 0 }, 94 { offsetof(xfs_sb_t, sb_rbmblocks), 0 }, 95 { offsetof(xfs_sb_t, sb_logblocks), 0 }, 96 { offsetof(xfs_sb_t, sb_versionnum), 0 }, 97 { offsetof(xfs_sb_t, sb_sectsize), 0 }, 98 { offsetof(xfs_sb_t, sb_inodesize), 0 }, 99 { offsetof(xfs_sb_t, sb_inopblock), 0 }, 100 { offsetof(xfs_sb_t, sb_fname[0]), 1 }, 101 { offsetof(xfs_sb_t, sb_blocklog), 0 }, 102 { offsetof(xfs_sb_t, sb_sectlog), 0 }, 103 { offsetof(xfs_sb_t, sb_inodelog), 0 }, 104 { offsetof(xfs_sb_t, sb_inopblog), 0 }, 105 { offsetof(xfs_sb_t, sb_agblklog), 0 }, 106 { offsetof(xfs_sb_t, sb_rextslog), 0 }, 107 { offsetof(xfs_sb_t, sb_inprogress), 0 }, 108 { offsetof(xfs_sb_t, sb_imax_pct), 0 }, 109 { offsetof(xfs_sb_t, sb_icount), 0 }, 110 { offsetof(xfs_sb_t, sb_ifree), 0 }, 111 { offsetof(xfs_sb_t, sb_fdblocks), 0 }, 112 { offsetof(xfs_sb_t, sb_frextents), 0 }, 113 { offsetof(xfs_sb_t, sb_uquotino), 0 }, 114 { offsetof(xfs_sb_t, sb_gquotino), 0 }, 115 { offsetof(xfs_sb_t, sb_qflags), 0 }, 116 { offsetof(xfs_sb_t, sb_flags), 0 }, 117 { offsetof(xfs_sb_t, sb_shared_vn), 0 }, 118 { offsetof(xfs_sb_t, sb_inoalignmt), 0 }, 119 { offsetof(xfs_sb_t, sb_unit), 0 }, 120 { offsetof(xfs_sb_t, sb_width), 0 }, 121 { offsetof(xfs_sb_t, sb_dirblklog), 0 }, 122 { offsetof(xfs_sb_t, sb_logsectlog), 0 }, 123 { offsetof(xfs_sb_t, sb_logsectsize),0 }, 124 { offsetof(xfs_sb_t, sb_logsunit), 0 }, 125 { offsetof(xfs_sb_t, sb_features2), 0 }, 126 { sizeof(xfs_sb_t), 0 } 127}; 128 129/* 130 * Return a pointer to an initialized xfs_mount structure. 131 */ 132xfs_mount_t * 133xfs_mount_init(void) 134{ 135 xfs_mount_t *mp; 136 137 mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP); 138 139 if (xfs_icsb_init_counters(mp)) { 140 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; 141 } 142 143 AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail"); 144 spinlock_init(&mp->m_sb_lock, "xfs_sb"); 145 /* FreeBSD specfic */ 146 sx_init(&mp->m_ilock, "xfs_mnt"); 147 initnsema(&mp->m_growlock, 1, "xfs_grow"); 148 /* 149 * Initialize the AIL. 150 */ 151 xfs_trans_ail_init(mp); 152 153 atomic_set(&mp->m_active_trans, 0); 154 155 return mp; 156} 157 158/* 159 * Free up the resources associated with a mount structure. Assume that 160 * the structure was initially zeroed, so we can tell which fields got 161 * initialized. 162 */ 163void 164xfs_mount_free( 165 xfs_mount_t *mp, 166 int remove_bhv) 167{ 168 if (mp->m_ihash) 169 xfs_ihash_free(mp); 170 if (mp->m_chash) 171 xfs_chash_free(mp); 172 173 if (mp->m_perag) { 174 int agno; 175 176 for (agno = 0; agno < mp->m_maxagi; agno++) 177 if (mp->m_perag[agno].pagb_list) 178 kmem_free(mp->m_perag[agno].pagb_list, 179 sizeof(xfs_perag_busy_t) * 180 XFS_PAGB_NUM_SLOTS); 181 kmem_free(mp->m_perag, 182 sizeof(xfs_perag_t) * mp->m_sb.sb_agcount); 183 } 184 185 AIL_LOCK_DESTROY(&mp->m_ail_lock); 186 spinlock_destroy(&mp->m_sb_lock); 187 /* FreeBSD specfic */ 188 sx_destroy(&mp->m_ilock); 189 freesema(&mp->m_growlock); 190 if (mp->m_quotainfo) 191 XFS_QM_DONE(mp); 192 193 if (mp->m_fsname != NULL) 194 kmem_free(mp->m_fsname, mp->m_fsname_len); 195 if (mp->m_rtname != NULL) 196 kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1); 197 if (mp->m_logname != NULL) 198 kmem_free(mp->m_logname, strlen(mp->m_logname) + 1); 199 200 if (remove_bhv) { 201 xfs_vfs_t *vfsp = XFS_MTOVFS(mp); 202 203 bhv_remove_all_vfsops(vfsp, 0); 204 VFS_REMOVEBHV(vfsp, &mp->m_bhv); 205 } 206 207 xfs_icsb_destroy_counters(mp); 208 kmem_free(mp, sizeof(xfs_mount_t)); 209} 210 211 212/* 213 * Check the validity of the SB found. 214 */ 215STATIC int 216xfs_mount_validate_sb( 217 xfs_mount_t *mp, 218 xfs_sb_t *sbp, 219 int flags) 220{ 221 /* 222 * If the log device and data device have the 223 * same device number, the log is internal. 224 * Consequently, the sb_logstart should be non-zero. If 225 * we have a zero sb_logstart in this case, we may be trying to mount 226 * a volume filesystem in a non-volume manner. 227 */ 228 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 229 xfs_fs_mount_cmn_err(flags, "bad magic number"); 230 return XFS_ERROR(EWRONGFS); 231 } 232 233 if (!XFS_SB_GOOD_VERSION(sbp)) { 234 xfs_fs_mount_cmn_err(flags, "bad version"); 235 return XFS_ERROR(EWRONGFS); 236 } 237 238 if (unlikely( 239 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { 240 xfs_fs_mount_cmn_err(flags, 241 "filesystem is marked as having an external log; " 242 "specify logdev on the\nmount command line."); 243 return XFS_ERROR(EINVAL); 244 } 245 246 if (unlikely( 247 sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { 248 xfs_fs_mount_cmn_err(flags, 249 "filesystem is marked as having an internal log; " 250 "do not specify logdev on\nthe mount command line."); 251 return XFS_ERROR(EINVAL); 252 } 253 254 /* 255 * More sanity checking. These were stolen directly from 256 * xfs_repair. 257 */ 258 if (unlikely( 259 sbp->sb_agcount <= 0 || 260 sbp->sb_sectsize < XFS_MIN_SECTORSIZE || 261 sbp->sb_sectsize > XFS_MAX_SECTORSIZE || 262 sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || 263 sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || 264 sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || 265 sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || 266 sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || 267 sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || 268 sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || 269 sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || 270 sbp->sb_inodelog < XFS_DINODE_MIN_LOG || 271 sbp->sb_inodelog > XFS_DINODE_MAX_LOG || 272 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 273 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 274 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 275 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { 276 xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); 277 return XFS_ERROR(EFSCORRUPTED); 278 } 279 280 /* 281 * Sanity check AG count, size fields against data size field 282 */ 283 if (unlikely( 284 sbp->sb_dblocks == 0 || 285 sbp->sb_dblocks > 286 (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || 287 sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * 288 sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { 289 xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed"); 290 return XFS_ERROR(EFSCORRUPTED); 291 } 292 293 ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); 294 ASSERT(sbp->sb_blocklog >= BBSHIFT); 295 296#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ 297 if (unlikely( 298 (sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX || 299 (sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) { 300#else /* Limited by UINT_MAX of sectors */ 301 if (unlikely( 302 (sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX || 303 (sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) { 304#endif 305 xfs_fs_mount_cmn_err(flags, 306 "file system too large to be mounted on this system."); 307 return XFS_ERROR(E2BIG); 308 } 309 310 if (unlikely(sbp->sb_inprogress)) { 311 xfs_fs_mount_cmn_err(flags, "file system busy"); 312 return XFS_ERROR(EFSCORRUPTED); 313 } 314 315 /* 316 * Version 1 directory format has never worked on Linux. 317 */ 318 if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) { 319 xfs_fs_mount_cmn_err(flags, 320 "file system using version 1 directory format"); 321 return XFS_ERROR(ENOSYS); 322 } 323 324 /* 325 * Until this is fixed only page-sized or smaller data blocks work. 326 */ 327 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { 328 xfs_fs_mount_cmn_err(flags, 329 "file system with blocksize %d bytes", 330 sbp->sb_blocksize); 331 xfs_fs_mount_cmn_err(flags, 332 "only pagesize (%ld) or less will currently work.", 333 PAGE_SIZE); 334 return XFS_ERROR(ENOSYS); 335 } 336 337 return 0; 338} 339 340xfs_agnumber_t 341xfs_initialize_perag( 342 struct xfs_vfs *vfs, 343 xfs_mount_t *mp, 344 xfs_agnumber_t agcount) 345{ 346 xfs_agnumber_t index, max_metadata; 347 xfs_perag_t *pag; 348 xfs_agino_t agino; 349 xfs_ino_t ino; 350 xfs_sb_t *sbp = &mp->m_sb; 351 xfs_ino_t max_inum = XFS_MAXINUMBER_32; 352 353 /* Check to see if the filesystem can overflow 32 bit inodes */ 354 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); 355 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 356 357 /* Clear the mount flag if no inode can overflow 32 bits 358 * on this filesystem, or if specifically requested.. 359 */ 360 if ((vfs->vfs_flag & VFS_32BITINODES) && ino > max_inum) { 361 mp->m_flags |= XFS_MOUNT_32BITINODES; 362 } else { 363 mp->m_flags &= ~XFS_MOUNT_32BITINODES; 364 } 365 366 /* If we can overflow then setup the ag headers accordingly */ 367 if (mp->m_flags & XFS_MOUNT_32BITINODES) { 368 /* Calculate how much should be reserved for inodes to 369 * meet the max inode percentage. 370 */ 371 if (mp->m_maxicount) { 372 __uint64_t icount; 373 374 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 375 do_div(icount, 100); 376 icount += sbp->sb_agblocks - 1; 377 do_div(icount, sbp->sb_agblocks); 378 max_metadata = icount; 379 } else { 380 max_metadata = agcount; 381 } 382 for (index = 0; index < agcount; index++) { 383 ino = XFS_AGINO_TO_INO(mp, index, agino); 384 if (ino > max_inum) { 385 index++; 386 break; 387 } 388 389 /* This ag is preferred for inodes */ 390 pag = &mp->m_perag[index]; 391 pag->pagi_inodeok = 1; 392 if (index < max_metadata) 393 pag->pagf_metadata = 1; 394 } 395 } else { 396 /* Setup default behavior for smaller filesystems */ 397 for (index = 0; index < agcount; index++) { 398 pag = &mp->m_perag[index]; 399 pag->pagi_inodeok = 1; 400 } 401 } 402 return index; 403} 404 405/* 406 * xfs_xlatesb 407 * 408 * data - on disk version of sb 409 * sb - a superblock 410 * dir - conversion direction: <0 - convert sb to buf 411 * >0 - convert buf to sb 412 * fields - which fields to copy (bitmask) 413 */ 414void 415xfs_xlatesb( 416 void *data, 417 xfs_sb_t *sb, 418 int dir, 419 __int64_t fields) 420{ 421 xfs_caddr_t buf_ptr; 422 xfs_caddr_t mem_ptr; 423 xfs_sb_field_t f; 424 int first; 425 int size; 426 427 ASSERT(dir); 428 ASSERT(fields); 429 430 if (!fields) 431 return; 432 433 buf_ptr = (xfs_caddr_t)data; 434 mem_ptr = (xfs_caddr_t)sb; 435 436 while (fields) { 437 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 438 first = xfs_sb_info[f].offset; 439 size = xfs_sb_info[f + 1].offset - first; 440 441 ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1); 442 443 if (size == 1 || xfs_sb_info[f].type == 1) { 444 if (dir > 0) { 445 memcpy(mem_ptr + first, buf_ptr + first, size); 446 } else { 447 memcpy(buf_ptr + first, mem_ptr + first, size); 448 } 449 } else { 450 switch (size) { 451 case 2: 452 INT_XLATE(*(__uint16_t*)(buf_ptr+first), 453 *(__uint16_t*)(mem_ptr+first), 454 dir, ARCH_CONVERT); 455 break; 456 case 4: 457 INT_XLATE(*(__uint32_t*)(buf_ptr+first), 458 *(__uint32_t*)(mem_ptr+first), 459 dir, ARCH_CONVERT); 460 break; 461 case 8: 462 INT_XLATE(*(__uint64_t*)(buf_ptr+first), 463 *(__uint64_t*)(mem_ptr+first), dir, ARCH_CONVERT); 464 break; 465 default: 466 ASSERT(0); 467 } 468 } 469 470 fields &= ~(1LL << f); 471 } 472} 473 474/* 475 * xfs_readsb 476 * 477 * Does the initial read of the superblock. 478 */ 479int 480xfs_readsb(xfs_mount_t *mp, int flags) 481{ 482 unsigned int sector_size; 483 unsigned int extra_flags; 484 xfs_buf_t *bp; 485 xfs_sb_t *sbp; 486 int error; 487 488 ASSERT(mp->m_sb_bp == NULL); 489 ASSERT(mp->m_ddev_targp != NULL); 490 491 /* 492 * Allocate a (locked) buffer to hold the superblock. 493 * This will be kept around at all times to optimize 494 * access to the superblock. 495 */ 496 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); 497 extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; 498 499 bp = xfs_getsb(mp,0); 500 501 if (!bp || XFS_BUF_ISERROR(bp)) { 502 xfs_fs_mount_cmn_err(flags, "SB read failed"); 503 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 504 goto fail; 505 } 506 ASSERT(XFS_BUF_ISBUSY(bp)); 507 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 508 509 /* 510 * Initialize the mount structure from the superblock. 511 * But first do some basic consistency checking. 512 */ 513 sbp = XFS_BUF_TO_SBP(bp); 514 xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), 1, XFS_SB_ALL_BITS); 515 516 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); 517 if (error) { 518 xfs_fs_mount_cmn_err(flags, "SB validate failed"); 519 goto fail; 520 } 521 522 /* 523 * We must be able to do sector-sized and sector-aligned IO. 524 */ 525 if (sector_size > mp->m_sb.sb_sectsize) { 526 xfs_fs_mount_cmn_err(flags, 527 "device supports only %u byte sectors (not %u)", 528 sector_size, mp->m_sb.sb_sectsize); 529 error = ENOSYS; 530 goto fail; 531 } 532 533 /* 534 * If device sector size is smaller than the superblock size, 535 * re-read the superblock so the buffer is correctly sized. 536 */ 537 if (sector_size < mp->m_sb.sb_sectsize) { 538 XFS_BUF_UNMANAGE(bp); 539 xfs_buf_relse(bp); 540 sector_size = mp->m_sb.sb_sectsize; 541 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 542 BTOBB(sector_size), extra_flags); 543 if (!bp || XFS_BUF_ISERROR(bp)) { 544 xfs_fs_mount_cmn_err(flags, "SB re-read failed"); 545 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 546 goto fail; 547 } 548 ASSERT(XFS_BUF_ISBUSY(bp)); 549 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 550 } 551 552 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); 553 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); 554 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); 555 556 mp->m_sb_bp = bp; 557 xfs_buf_relse(bp); 558 ASSERT(XFS_BUF_VALUSEMA(bp) > 0); 559 return 0; 560 561 fail: 562 if (bp) { 563 XFS_BUF_UNMANAGE(bp); 564 xfs_buf_relse(bp); 565 } 566 return error; 567} 568 569 570/* 571 * xfs_mount_common 572 * 573 * Mount initialization code establishing various mount 574 * fields from the superblock associated with the given 575 * mount structure 576 */ 577STATIC void 578xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) 579{ 580 int i; 581 582 mp->m_agfrotor = mp->m_agirotor = 0; 583 spinlock_init(&mp->m_agirotor_lock, "m_agirotor_lock"); 584 mp->m_maxagi = mp->m_sb.sb_agcount; 585 mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; 586 mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; 587 mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; 588 mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; 589 mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; 590 mp->m_litino = sbp->sb_inodesize - 591 ((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t)); 592 mp->m_blockmask = sbp->sb_blocksize - 1; 593 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; 594 mp->m_blockwmask = mp->m_blockwsize - 1; 595#ifdef RMC 596 INIT_LIST_HEAD(&mp->m_del_inodes); 597#endif 598 TAILQ_INIT(&mp->m_del_inodes); 599 600 /* 601 * Setup for attributes, in case they get created. 602 * This value is for inodes getting attributes for the first time, 603 * the per-inode value is for old attribute values. 604 */ 605 ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048); 606 switch (sbp->sb_inodesize) { 607 case 256: 608 mp->m_attroffset = XFS_LITINO(mp) - 609 XFS_BMDR_SPACE_CALC(MINABTPTRS); 610 break; 611 case 512: 612 case 1024: 613 case 2048: 614 mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); 615 break; 616 default: 617 ASSERT(0); 618 } 619 ASSERT(mp->m_attroffset < XFS_LITINO(mp)); 620 621 for (i = 0; i < 2; i++) { 622 mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 623 xfs_alloc, i == 0); 624 mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 625 xfs_alloc, i == 0); 626 } 627 for (i = 0; i < 2; i++) { 628 mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 629 xfs_bmbt, i == 0); 630 mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 631 xfs_bmbt, i == 0); 632 } 633 for (i = 0; i < 2; i++) { 634 mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 635 xfs_inobt, i == 0); 636 mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 637 xfs_inobt, i == 0); 638 } 639 640 mp->m_bsize = XFS_FSB_TO_BB(mp, 1); 641 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, 642 sbp->sb_inopblock); 643 mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; 644} 645/* 646 * xfs_mountfs 647 * 648 * This function does the following on an initial mount of a file system: 649 * - reads the superblock from disk and init the mount struct 650 * - if we're a 32-bit kernel, do a size check on the superblock 651 * so we don't mount terabyte filesystems 652 * - init mount struct realtime fields 653 * - allocate inode hash table for fs 654 * - init directory manager 655 * - perform recovery and init the log manager 656 */ 657int 658xfs_mountfs( 659 xfs_vfs_t *vfsp, 660 xfs_mount_t *mp, 661 int mfsi_flags) 662{ 663 xfs_buf_t *bp; 664 xfs_sb_t *sbp = &(mp->m_sb); 665 xfs_inode_t *rip; 666 xfs_vnode_t *rvp = NULL; 667 int readio_log, writeio_log; 668 xfs_daddr_t d; 669 __uint64_t ret64; 670 __int64_t update_flags; 671 uint quotamount, quotaflags; 672 int agno; 673 int uuid_mounted = 0; 674 int error = 0; 675 676 if (mp->m_sb_bp == NULL) { 677 if ((error = xfs_readsb(mp, mfsi_flags))) { 678 return error; 679 } 680 } 681 xfs_mount_common(mp, sbp); 682 683 /* 684 * Check if sb_agblocks is aligned at stripe boundary 685 * If sb_agblocks is NOT aligned turn off m_dalign since 686 * allocator alignment is within an ag, therefore ag has 687 * to be aligned at stripe boundary. 688 */ 689 update_flags = 0LL; 690 if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) { 691 /* 692 * If stripe unit and stripe width are not multiples 693 * of the fs blocksize turn off alignment. 694 */ 695 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || 696 (BBTOB(mp->m_swidth) & mp->m_blockmask)) { 697 if (mp->m_flags & XFS_MOUNT_RETERR) { 698 cmn_err(CE_WARN, 699 "XFS: alignment check 1 failed"); 700 error = XFS_ERROR(EINVAL); 701 goto error1; 702 } 703 mp->m_dalign = mp->m_swidth = 0; 704 } else { 705 /* 706 * Convert the stripe unit and width to FSBs. 707 */ 708 mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); 709 if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { 710 if (mp->m_flags & XFS_MOUNT_RETERR) { 711 error = XFS_ERROR(EINVAL); 712 goto error1; 713 } 714 xfs_fs_cmn_err(CE_WARN, mp, 715"stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)", 716 mp->m_dalign, mp->m_swidth, 717 sbp->sb_agblocks); 718 719 mp->m_dalign = 0; 720 mp->m_swidth = 0; 721 } else if (mp->m_dalign) { 722 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); 723 } else { 724 if (mp->m_flags & XFS_MOUNT_RETERR) { 725 xfs_fs_cmn_err(CE_WARN, mp, 726"stripe alignment turned off: sunit(%d) less than bsize(%d)", 727 mp->m_dalign, 728 mp->m_blockmask +1); 729 error = XFS_ERROR(EINVAL); 730 goto error1; 731 } 732 mp->m_swidth = 0; 733 } 734 } 735 736 /* 737 * Update superblock with new values 738 * and log changes 739 */ 740 if (XFS_SB_VERSION_HASDALIGN(sbp)) { 741 if (sbp->sb_unit != mp->m_dalign) { 742 sbp->sb_unit = mp->m_dalign; 743 update_flags |= XFS_SB_UNIT; 744 } 745 if (sbp->sb_width != mp->m_swidth) { 746 sbp->sb_width = mp->m_swidth; 747 update_flags |= XFS_SB_WIDTH; 748 } 749 } 750 } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && 751 XFS_SB_VERSION_HASDALIGN(&mp->m_sb)) { 752 mp->m_dalign = sbp->sb_unit; 753 mp->m_swidth = sbp->sb_width; 754 } 755 756 xfs_alloc_compute_maxlevels(mp); 757 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); 758 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); 759 xfs_ialloc_compute_maxlevels(mp); 760 761 if (sbp->sb_imax_pct) { 762 __uint64_t icount; 763 764 /* Make sure the maximum inode count is a multiple of the 765 * units we allocate inodes in. 766 */ 767 768 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 769 do_div(icount, 100); 770 do_div(icount, mp->m_ialloc_blks); 771 mp->m_maxicount = (icount * mp->m_ialloc_blks) << 772 sbp->sb_inopblog; 773 } else 774 mp->m_maxicount = 0; 775 776 mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog); 777 778 /* 779 * XFS uses the uuid from the superblock as the unique 780 * identifier for fsid. We can not use the uuid from the volume 781 * since a single partition filesystem is identical to a single 782 * partition volume/filesystem. 783 */ 784 if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && 785 (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { 786 if (xfs_uuid_mount(mp)) { 787 error = XFS_ERROR(EINVAL); 788 goto error1; 789 } 790 uuid_mounted=1; 791 ret64 = uuid_hash64(&sbp->sb_uuid); 792 memcpy(&vfsp->vfs_fsid, &ret64, sizeof(ret64)); 793 } 794 795 /* 796 * Set the default minimum read and write sizes unless 797 * already specified in a mount option. 798 * We use smaller I/O sizes when the file system 799 * is being used for NFS service (wsync mount option). 800 */ 801 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { 802 if (mp->m_flags & XFS_MOUNT_WSYNC) { 803 readio_log = XFS_WSYNC_READIO_LOG; 804 writeio_log = XFS_WSYNC_WRITEIO_LOG; 805 } else { 806 readio_log = XFS_READIO_LOG_LARGE; 807 writeio_log = XFS_WRITEIO_LOG_LARGE; 808 } 809 } else { 810 readio_log = mp->m_readio_log; 811 writeio_log = mp->m_writeio_log; 812 } 813 814 /* 815 * Set the number of readahead buffers to use based on 816 * physical memory size. 817 */ 818 if (xfs_physmem <= 4096) /* <= 16MB */ 819 mp->m_nreadaheads = XFS_RW_NREADAHEAD_16MB; 820 else if (xfs_physmem <= 8192) /* <= 32MB */ 821 mp->m_nreadaheads = XFS_RW_NREADAHEAD_32MB; 822 else 823 mp->m_nreadaheads = XFS_RW_NREADAHEAD_K32; 824 if (sbp->sb_blocklog > readio_log) { 825 mp->m_readio_log = sbp->sb_blocklog; 826 } else { 827 mp->m_readio_log = readio_log; 828 } 829 mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog); 830 if (sbp->sb_blocklog > writeio_log) { 831 mp->m_writeio_log = sbp->sb_blocklog; 832 } else { 833 mp->m_writeio_log = writeio_log; 834 } 835 mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog); 836 837 /* 838 * Set the inode cluster size based on the physical memory 839 * size. This may still be overridden by the file system 840 * block size if it is larger than the chosen cluster size. 841 */ 842 if (xfs_physmem <= btoc(32 * 1024 * 1024)) { /* <= 32 MB */ 843 mp->m_inode_cluster_size = XFS_INODE_SMALL_CLUSTER_SIZE; 844 } else { 845 mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; 846 } 847 /* 848 * Set whether we're using inode alignment. 849 */ 850 if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) && 851 mp->m_sb.sb_inoalignmt >= 852 XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) 853 mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; 854 else 855 mp->m_inoalign_mask = 0; 856 /* 857 * If we are using stripe alignment, check whether 858 * the stripe unit is a multiple of the inode alignment 859 */ 860 if (mp->m_dalign && mp->m_inoalign_mask && 861 !(mp->m_dalign & mp->m_inoalign_mask)) 862 mp->m_sinoalign = mp->m_dalign; 863 else 864 mp->m_sinoalign = 0; 865 /* 866 * Check that the data (and log if separate) are an ok size. 867 */ 868 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 869 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 870 cmn_err(CE_WARN, "XFS: size check 1 failed"); 871 error = XFS_ERROR(E2BIG); 872 goto error1; 873 } 874 error = xfs_read_buf(mp, mp->m_ddev_targp, 875 d - XFS_FSS_TO_BB(mp, 1), 876 XFS_FSS_TO_BB(mp, 1), 0, &bp); 877 if (!error) { 878 xfs_buf_relse(bp); 879 } else { 880 cmn_err(CE_WARN, "XFS: size check 2 failed"); 881 if (error == ENOSPC) { 882 error = XFS_ERROR(E2BIG); 883 } 884 goto error1; 885 } 886 887 if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) && 888 mp->m_logdev_targp != mp->m_ddev_targp) { 889 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 890 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 891 cmn_err(CE_WARN, "XFS: size check 3 failed"); 892 error = XFS_ERROR(E2BIG); 893 goto error1; 894 } 895 error = xfs_read_buf(mp, mp->m_logdev_targp, 896 d - XFS_FSB_TO_BB(mp, 1), 897 XFS_FSB_TO_BB(mp, 1), 0, &bp); 898 if (!error) { 899 xfs_buf_relse(bp); 900 } else { 901 cmn_err(CE_WARN, "XFS: size check 3 failed"); 902 if (error == ENOSPC) { 903 error = XFS_ERROR(E2BIG); 904 } 905 goto error1; 906 } 907 } 908 909 /* 910 * Initialize realtime fields in the mount structure 911 */ 912 if ((error = xfs_rtmount_init(mp))) { 913 cmn_err(CE_WARN, "XFS: RT mount failed"); 914 goto error1; 915 } 916 917 /* 918 * For client case we are done now 919 */ 920 if (mfsi_flags & XFS_MFSI_CLIENT) { 921 return 0; 922 } 923 924 /* 925 * Copies the low order bits of the timestamp and the randomly 926 * set "sequence" number out of a UUID. 927 */ 928 uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid); 929 930 /* 931 * The vfs structure needs to have a file system independent 932 * way of checking for the invariant file system ID. Since it 933 * can't look at mount structures it has a pointer to the data 934 * in the mount structure. 935 * 936 * File systems that don't support user level file handles (i.e. 937 * all of them except for XFS) will leave vfs_altfsid as NULL. 938 */ 939 vfsp->vfs_altfsid = (xfs_fsid_t *)mp->m_fixedfsid; 940 mp->m_dmevmask = 0; /* not persistent; set after each mount */ 941 942 /* 943 * Select the right directory manager. 944 */ 945 mp->m_dirops = 946 XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ? 947 xfsv2_dirops : 948 xfsv1_dirops; 949 950 /* 951 * Initialize directory manager's entries. 952 */ 953 XFS_DIR_MOUNT(mp); 954 955 /* 956 * Initialize the attribute manager's entries. 957 */ 958 mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100; 959 960 /* 961 * Initialize the precomputed transaction reservations values. 962 */ 963 xfs_trans_init(mp); 964 965 /* 966 * Allocate and initialize the inode hash table for this 967 * file system. 968 */ 969 xfs_ihash_init(mp); 970 xfs_chash_init(mp); 971 972 /* 973 * Allocate and initialize the per-ag data. 974 */ 975 init_rwsem(&mp->m_peraglock); 976 mp->m_perag = 977 kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP); 978 979 mp->m_maxagi = xfs_initialize_perag(vfsp, mp, sbp->sb_agcount); 980 981 /* 982 * log's mount-time initialization. Perform 1st part recovery if needed 983 */ 984 if (likely(sbp->sb_logblocks > 0)) { /* check for volume case */ 985 error = xfs_log_mount(mp, mp->m_logdev_targp, 986 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), 987 XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); 988 if (error) { 989 cmn_err(CE_WARN, "XFS: log mount failed"); 990 goto error2; 991 } 992 } else { /* No log has been defined */ 993 cmn_err(CE_WARN, "XFS: no log defined"); 994 XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp); 995 error = XFS_ERROR(EFSCORRUPTED); 996 goto error2; 997 } 998 999 /* 1000 * Get and sanity-check the root inode. 1001 * Save the pointer to it in the mount structure. 1002 */ 1003 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0); 1004 if (error) { 1005 cmn_err(CE_WARN, "XFS: failed to read root inode"); 1006 goto error3; 1007 } 1008 1009 ASSERT(rip != NULL); 1010 rvp = XFS_ITOV(rip); 1011 1012 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { 1013 cmn_err(CE_WARN, "XFS: corrupted root inode"); 1014 printf("Root inode %p is not a directory: %llu", 1015 mp->m_ddev_targp, (unsigned long long)rip->i_ino); 1016 xfs_iunlock(rip, XFS_ILOCK_EXCL); 1017 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, 1018 mp); 1019 error = XFS_ERROR(EFSCORRUPTED); 1020 goto error4; 1021 } 1022 mp->m_rootip = rip; /* save it */ 1023 1024 xfs_iunlock(rip, XFS_ILOCK_EXCL); 1025 1026 /* 1027 * Initialize realtime inode pointers in the mount structure 1028 */ 1029 if ((error = xfs_rtmount_inodes(mp))) { 1030 /* 1031 * Free up the root inode. 1032 */ 1033 cmn_err(CE_WARN, "XFS: failed to read RT inodes"); 1034 goto error4; 1035 } 1036 1037 /* 1038 * If fs is not mounted readonly, then update the superblock 1039 * unit and width changes. 1040 */ 1041 if (update_flags && !(vfsp->vfs_flag & VFS_RDONLY)) 1042 xfs_mount_log_sbunit(mp, update_flags); 1043 1044 /* 1045 * Initialise the XFS quota management subsystem for this mount 1046 */ 1047 if ((error = XFS_QM_INIT(mp, "amount, "aflags))) 1048 goto error4; 1049 1050 /* 1051 * Finish recovering the file system. This part needed to be 1052 * delayed until after the root and real-time bitmap inodes 1053 * were consistently read in. 1054 */ 1055 error = xfs_log_mount_finish(mp, mfsi_flags); 1056 if (error) { 1057 cmn_err(CE_WARN, "XFS: log mount finish failed"); 1058 goto error4; 1059 } 1060 1061 /* 1062 * Complete the quota initialisation, post-log-replay component. 1063 */ 1064 if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags))) 1065 goto error4; 1066 1067 return 0; 1068 1069 error4: 1070 /* 1071 * Free up the root inode. 1072 */ 1073 VN_RELE(rvp); 1074 error3: 1075 xfs_log_unmount_dealloc(mp); 1076 error2: 1077 xfs_ihash_free(mp); 1078 xfs_chash_free(mp); 1079 for (agno = 0; agno < sbp->sb_agcount; agno++) 1080 if (mp->m_perag[agno].pagb_list) 1081 kmem_free(mp->m_perag[agno].pagb_list, 1082 sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS); 1083 kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t)); 1084 mp->m_perag = NULL; 1085 /* FALLTHROUGH */ 1086 error1: 1087 if (uuid_mounted) 1088 xfs_uuid_unmount(mp); 1089 xfs_freesb(mp); 1090 return error; 1091} 1092 1093/* 1094 * xfs_unmountfs 1095 * 1096 * This flushes out the inodes,dquots and the superblock, unmounts the 1097 * log and makes sure that incore structures are freed. 1098 */ 1099int 1100xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) 1101{ 1102 struct xfs_vfs *vfsp = XFS_MTOVFS(mp); 1103#if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1104 int64_t fsid; 1105#endif 1106 1107 xfs_iflush_all(mp); 1108 1109 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); 1110 1111 /* 1112 * Flush out the log synchronously so that we know for sure 1113 * that nothing is pinned. This is important because bflush() 1114 * will skip pinned buffers. 1115 */ 1116 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1117 1118 xfs_binval(mp->m_ddev_targp); 1119 if (mp->m_rtdev_targp) { 1120 xfs_binval(mp->m_rtdev_targp); 1121 } 1122 1123 xfs_unmountfs_writesb(mp); 1124 1125 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1126 1127 xfs_log_unmount(mp); /* Done! No more fs ops. */ 1128 1129 xfs_freesb(mp); 1130 1131 /* 1132 * All inodes from this mount point should be freed. 1133 */ 1134 //ASSERT(mp->m_inodes == NULL); 1135 if (mp->m_inodes != NULL ) { 1136 printf("WRONG: mp->m_ireclaims: %d\n", mp->m_ireclaims); 1137 printf("WRONG: mp->m_inodes: %p\n", mp->m_inodes); 1138 } 1139 1140 xfs_unmountfs_close(mp, cr); 1141 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) 1142 xfs_uuid_unmount(mp); 1143 1144#if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1145 /* 1146 * clear all error tags on this filesystem 1147 */ 1148 memcpy(&fsid, &vfsp->vfs_fsid, sizeof(int64_t)); 1149 xfs_errortag_clearall_umount(fsid, mp->m_fsname, 0); 1150#endif 1151 XFS_IODONE(vfsp); 1152 xfs_mount_free(mp, 1); 1153 return 0; 1154} 1155 1156void 1157xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr) 1158{ 1159 if (mp->m_logdev_targp != mp->m_ddev_targp) 1160 xfs_free_buftarg(mp->m_logdev_targp, 1); 1161 if (mp->m_rtdev_targp) 1162 xfs_free_buftarg(mp->m_rtdev_targp, 1); 1163 xfs_free_buftarg(mp->m_ddev_targp, 0); 1164} 1165 1166STATIC void 1167xfs_unmountfs_wait(xfs_mount_t *mp) 1168{ 1169 if (mp->m_logdev_targp != mp->m_ddev_targp) 1170 xfs_wait_buftarg(mp->m_logdev_targp); 1171 if (mp->m_rtdev_targp) 1172 xfs_wait_buftarg(mp->m_rtdev_targp); 1173 xfs_wait_buftarg(mp->m_ddev_targp); 1174} 1175 1176int 1177xfs_unmountfs_writesb(xfs_mount_t *mp) 1178{ 1179 xfs_buf_t *sbp; 1180 xfs_sb_t *sb; 1181 int error = 0; 1182 1183 /* 1184 * skip superblock write if fs is read-only, or 1185 * if we are doing a forced umount. 1186 */ 1187 sbp = xfs_getsb(mp, 0); 1188 if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY || 1189 XFS_FORCED_SHUTDOWN(mp))) { 1190 1191 xfs_icsb_sync_counters(mp); 1192 1193 /* 1194 * mark shared-readonly if desired 1195 */ 1196 sb = XFS_BUF_TO_SBP(sbp); 1197 if (mp->m_mk_sharedro) { 1198 if (!(sb->sb_flags & XFS_SBF_READONLY)) 1199 sb->sb_flags |= XFS_SBF_READONLY; 1200 if (!XFS_SB_VERSION_HASSHARED(sb)) 1201 XFS_SB_VERSION_ADDSHARED(sb); 1202 xfs_fs_cmn_err(CE_NOTE, mp, 1203 "Unmounting, marking shared read-only"); 1204 } 1205 XFS_BUF_UNDONE(sbp); 1206 XFS_BUF_UNREAD(sbp); 1207 XFS_BUF_UNDELAYWRITE(sbp); 1208 XFS_BUF_WRITE(sbp); 1209 XFS_BUF_UNASYNC(sbp); 1210 ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); 1211 xfsbdstrat(mp, sbp); 1212 /* Nevermind errors we might get here. */ 1213 error = xfs_iowait(sbp); 1214 if (error) 1215 xfs_ioerror_alert("xfs_unmountfs_writesb", 1216 mp, sbp, XFS_BUF_ADDR(sbp)); 1217 if (error && mp->m_mk_sharedro) 1218 xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly"); 1219 } 1220 xfs_buf_relse(sbp); 1221 return error; 1222} 1223 1224/* 1225 * xfs_mod_sb() can be used to copy arbitrary changes to the 1226 * in-core superblock into the superblock buffer to be logged. 1227 * It does not provide the higher level of locking that is 1228 * needed to protect the in-core superblock from concurrent 1229 * access. 1230 */ 1231void 1232xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) 1233{ 1234 xfs_buf_t *bp; 1235 int first; 1236 int last; 1237 xfs_mount_t *mp; 1238 xfs_sb_t *sbp; 1239 xfs_sb_field_t f; 1240 1241 ASSERT(fields); 1242 if (!fields) 1243 return; 1244 mp = tp->t_mountp; 1245 bp = xfs_trans_getsb(tp, mp, 0); 1246 sbp = XFS_BUF_TO_SBP(bp); 1247 first = sizeof(xfs_sb_t); 1248 last = 0; 1249 1250 /* translate/copy */ 1251 1252 xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), -1, fields); 1253 1254 /* find modified range */ 1255 1256 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 1257 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1258 first = xfs_sb_info[f].offset; 1259 1260 f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields); 1261 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1262 last = xfs_sb_info[f + 1].offset - 1; 1263 1264 xfs_trans_log_buf(tp, bp, first, last); 1265} 1266/* 1267 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply 1268 * a delta to a specified field in the in-core superblock. Simply 1269 * switch on the field indicated and apply the delta to that field. 1270 * Fields are not allowed to dip below zero, so if the delta would 1271 * do this do not apply it and return EINVAL. 1272 * 1273 * The SB_LOCK must be held when this routine is called. 1274 */ 1275int 1276xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, 1277 int delta, int rsvd) 1278{ 1279 int scounter; /* short counter for 32 bit fields */ 1280 long long lcounter; /* long counter for 64 bit fields */ 1281 long long res_used, rem; 1282 1283 /* 1284 * With the in-core superblock spin lock held, switch 1285 * on the indicated field. Apply the delta to the 1286 * proper field. If the fields value would dip below 1287 * 0, then do not apply the delta and return EINVAL. 1288 */ 1289 switch (field) { 1290 case XFS_SBS_ICOUNT: 1291 lcounter = (long long)mp->m_sb.sb_icount; 1292 lcounter += delta; 1293 if (lcounter < 0) { 1294 ASSERT(0); 1295 return XFS_ERROR(EINVAL); 1296 } 1297 mp->m_sb.sb_icount = lcounter; 1298 return 0; 1299 case XFS_SBS_IFREE: 1300 lcounter = (long long)mp->m_sb.sb_ifree; 1301 lcounter += delta; 1302 if (lcounter < 0) { 1303 ASSERT(0); 1304 return XFS_ERROR(EINVAL); 1305 } 1306 mp->m_sb.sb_ifree = lcounter; 1307 return 0; 1308 case XFS_SBS_FDBLOCKS: 1309 1310 lcounter = (long long)mp->m_sb.sb_fdblocks; 1311 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); 1312 1313 if (delta > 0) { /* Putting blocks back */ 1314 if (res_used > delta) { 1315 mp->m_resblks_avail += delta; 1316 } else { 1317 rem = delta - res_used; 1318 mp->m_resblks_avail = mp->m_resblks; 1319 lcounter += rem; 1320 } 1321 } else { /* Taking blocks away */ 1322 1323 lcounter += delta; 1324 1325 /* 1326 * If were out of blocks, use any available reserved blocks if 1327 * were allowed to. 1328 */ 1329 1330 if (lcounter < 0) { 1331 if (rsvd) { 1332 lcounter = (long long)mp->m_resblks_avail + delta; 1333 if (lcounter < 0) { 1334 return XFS_ERROR(ENOSPC); 1335 } 1336 mp->m_resblks_avail = lcounter; 1337 return 0; 1338 } else { /* not reserved */ 1339 return XFS_ERROR(ENOSPC); 1340 } 1341 } 1342 } 1343 1344 mp->m_sb.sb_fdblocks = lcounter; 1345 return 0; 1346 case XFS_SBS_FREXTENTS: 1347 lcounter = (long long)mp->m_sb.sb_frextents; 1348 lcounter += delta; 1349 if (lcounter < 0) { 1350 return XFS_ERROR(ENOSPC); 1351 } 1352 mp->m_sb.sb_frextents = lcounter; 1353 return 0; 1354 case XFS_SBS_DBLOCKS: 1355 lcounter = (long long)mp->m_sb.sb_dblocks; 1356 lcounter += delta; 1357 if (lcounter < 0) { 1358 ASSERT(0); 1359 return XFS_ERROR(EINVAL); 1360 } 1361 mp->m_sb.sb_dblocks = lcounter; 1362 return 0; 1363 case XFS_SBS_AGCOUNT: 1364 scounter = mp->m_sb.sb_agcount; 1365 scounter += delta; 1366 if (scounter < 0) { 1367 ASSERT(0); 1368 return XFS_ERROR(EINVAL); 1369 } 1370 mp->m_sb.sb_agcount = scounter; 1371 return 0; 1372 case XFS_SBS_IMAX_PCT: 1373 scounter = mp->m_sb.sb_imax_pct; 1374 scounter += delta; 1375 if (scounter < 0) { 1376 ASSERT(0); 1377 return XFS_ERROR(EINVAL); 1378 } 1379 mp->m_sb.sb_imax_pct = scounter; 1380 return 0; 1381 case XFS_SBS_REXTSIZE: 1382 scounter = mp->m_sb.sb_rextsize; 1383 scounter += delta; 1384 if (scounter < 0) { 1385 ASSERT(0); 1386 return XFS_ERROR(EINVAL); 1387 } 1388 mp->m_sb.sb_rextsize = scounter; 1389 return 0; 1390 case XFS_SBS_RBMBLOCKS: 1391 scounter = mp->m_sb.sb_rbmblocks; 1392 scounter += delta; 1393 if (scounter < 0) { 1394 ASSERT(0); 1395 return XFS_ERROR(EINVAL); 1396 } 1397 mp->m_sb.sb_rbmblocks = scounter; 1398 return 0; 1399 case XFS_SBS_RBLOCKS: 1400 lcounter = (long long)mp->m_sb.sb_rblocks; 1401 lcounter += delta; 1402 if (lcounter < 0) { 1403 ASSERT(0); 1404 return XFS_ERROR(EINVAL); 1405 } 1406 mp->m_sb.sb_rblocks = lcounter; 1407 return 0; 1408 case XFS_SBS_REXTENTS: 1409 lcounter = (long long)mp->m_sb.sb_rextents; 1410 lcounter += delta; 1411 if (lcounter < 0) { 1412 ASSERT(0); 1413 return XFS_ERROR(EINVAL); 1414 } 1415 mp->m_sb.sb_rextents = lcounter; 1416 return 0; 1417 case XFS_SBS_REXTSLOG: 1418 scounter = mp->m_sb.sb_rextslog; 1419 scounter += delta; 1420 if (scounter < 0) { 1421 ASSERT(0); 1422 return XFS_ERROR(EINVAL); 1423 } 1424 mp->m_sb.sb_rextslog = scounter; 1425 return 0; 1426 default: 1427 ASSERT(0); 1428 return XFS_ERROR(EINVAL); 1429 } 1430} 1431 1432/* 1433 * xfs_mod_incore_sb() is used to change a field in the in-core 1434 * superblock structure by the specified delta. This modification 1435 * is protected by the SB_LOCK. Just use the xfs_mod_incore_sb_unlocked() 1436 * routine to do the work. 1437 */ 1438int 1439xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd) 1440{ 1441 unsigned long s; 1442 int status; 1443 1444 /* check for per-cpu counters */ 1445 switch (field) { 1446#ifdef HAVE_PERCPU_SB 1447 case XFS_SBS_ICOUNT: 1448 case XFS_SBS_IFREE: 1449 case XFS_SBS_FDBLOCKS: 1450 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1451 status = xfs_icsb_modify_counters(mp, field, 1452 delta, rsvd); 1453 break; 1454 } 1455 /* FALLTHROUGH */ 1456#endif 1457 default: 1458 s = XFS_SB_LOCK(mp); 1459 status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 1460 XFS_SB_UNLOCK(mp, s); 1461 break; 1462 } 1463 1464 return status; 1465} 1466 1467/* 1468 * xfs_mod_incore_sb_batch() is used to change more than one field 1469 * in the in-core superblock structure at a time. This modification 1470 * is protected by a lock internal to this module. The fields and 1471 * changes to those fields are specified in the array of xfs_mod_sb 1472 * structures passed in. 1473 * 1474 * Either all of the specified deltas will be applied or none of 1475 * them will. If any modified field dips below 0, then all modifications 1476 * will be backed out and EINVAL will be returned. 1477 */ 1478int 1479xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) 1480{ 1481 unsigned long s; 1482 int status=0; 1483 xfs_mod_sb_t *msbp; 1484 1485 /* 1486 * Loop through the array of mod structures and apply each 1487 * individually. If any fail, then back out all those 1488 * which have already been applied. Do all of this within 1489 * the scope of the SB_LOCK so that all of the changes will 1490 * be atomic. 1491 */ 1492 s = XFS_SB_LOCK(mp); 1493 msbp = &msb[0]; 1494 for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { 1495 /* 1496 * Apply the delta at index n. If it fails, break 1497 * from the loop so we'll fall into the undo loop 1498 * below. 1499 */ 1500 switch (msbp->msb_field) { 1501#ifdef HAVE_PERCPU_SB 1502 case XFS_SBS_ICOUNT: 1503 case XFS_SBS_IFREE: 1504 case XFS_SBS_FDBLOCKS: 1505 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1506 status = xfs_icsb_modify_counters_locked(mp, 1507 msbp->msb_field, 1508 msbp->msb_delta, rsvd); 1509 break; 1510 } 1511 /* FALLTHROUGH */ 1512#endif 1513 default: 1514 status = xfs_mod_incore_sb_unlocked(mp, 1515 msbp->msb_field, 1516 msbp->msb_delta, rsvd); 1517 break; 1518 } 1519 1520 if (status != 0) { 1521 break; 1522 } 1523 } 1524 1525 /* 1526 * If we didn't complete the loop above, then back out 1527 * any changes made to the superblock. If you add code 1528 * between the loop above and here, make sure that you 1529 * preserve the value of status. Loop back until 1530 * we step below the beginning of the array. Make sure 1531 * we don't touch anything back there. 1532 */ 1533 if (status != 0) { 1534 msbp--; 1535 while (msbp >= msb) { 1536 switch (msbp->msb_field) { 1537#ifdef HAVE_PERCPU_SB 1538 case XFS_SBS_ICOUNT: 1539 case XFS_SBS_IFREE: 1540 case XFS_SBS_FDBLOCKS: 1541 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1542 status = 1543 xfs_icsb_modify_counters_locked(mp, 1544 msbp->msb_field, 1545 -(msbp->msb_delta), 1546 rsvd); 1547 break; 1548 } 1549 /* FALLTHROUGH */ 1550#endif 1551 default: 1552 status = xfs_mod_incore_sb_unlocked(mp, 1553 msbp->msb_field, 1554 -(msbp->msb_delta), 1555 rsvd); 1556 break; 1557 } 1558 ASSERT(status == 0); 1559 msbp--; 1560 } 1561 } 1562 XFS_SB_UNLOCK(mp, s); 1563 return status; 1564} 1565 1566/* 1567 * xfs_getsb() is called to obtain the buffer for the superblock. 1568 * The buffer is returned locked and read in from disk. 1569 * The buffer should be released with a call to xfs_brelse(). 1570 * 1571 * If the flags parameter is BUF_TRYLOCK, then we'll only return 1572 * the superblock buffer if it can be locked without sleeping. 1573 * If it can't then we'll return NULL. 1574 */ 1575xfs_buf_t * 1576xfs_getsb( 1577 xfs_mount_t *mp, 1578 int flags) 1579{ 1580 xfs_buf_t *bp; 1581 int extra_flags = 0; 1582 unsigned int sector_size; 1583 1584 1585 bp = mp->m_sb_bp; 1586 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); 1587#ifdef NOT 1588 /* MANAGED buf's appear broken in FreeBSD 1589 * but it's unclear if we need a persistant superblock? 1590 * since we now translate the ondisk superblock to 1591 * a separate translated structure and then translate that 1592 * structure back when we want to write the superblock 1593 */ 1594 extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; 1595 extra_flags = XFS_BUF_MANAGE; 1596#endif 1597 1598 mp->m_sb_bp = bp 1599 = xfs_buf_read_flags(mp->m_ddev_targp, 1600 XFS_SB_DADDR, 1601 BTOBB(sector_size), 1602 extra_flags); 1603 1604 XFS_BUF_HOLD(bp); 1605 ASSERT(XFS_BUF_ISDONE(bp)); 1606 if (!XFS_BUF_ISDONE(bp)){ 1607 printf("xfs_getsb: %p bp flags 0x%x\n",bp,bp->b_flags); 1608 } 1609 return bp; 1610} 1611 1612/* 1613 * Used to free the superblock along various error paths. 1614 */ 1615void 1616xfs_freesb( 1617 xfs_mount_t *mp) 1618{ 1619 xfs_buf_t *bp; 1620 1621 /* 1622 * Use xfs_getsb() so that the buffer will be locked 1623 * when we call xfs_buf_relse(). 1624 */ 1625 bp = xfs_getsb(mp, 0); 1626 XFS_BUF_UNMANAGE(bp); 1627 xfs_buf_relse(bp); 1628 mp->m_sb_bp = NULL; 1629} 1630 1631/* 1632 * See if the UUID is unique among mounted XFS filesystems. 1633 * Mount fails if UUID is nil or a FS with the same UUID is already mounted. 1634 */ 1635STATIC int 1636xfs_uuid_mount( 1637 xfs_mount_t *mp) 1638{ 1639 if (uuid_is_nil(&mp->m_sb.sb_uuid)) { 1640 cmn_err(CE_WARN, 1641 "XFS: Filesystem %s has nil UUID - can't mount", 1642 mp->m_fsname); 1643 return -1; 1644 } 1645 if (!uuid_table_insert(&mp->m_sb.sb_uuid)) { 1646 cmn_err(CE_WARN, 1647 "XFS: Filesystem %s has duplicate UUID - can't mount", 1648 mp->m_fsname); 1649 return -1; 1650 } 1651 return 0; 1652} 1653 1654/* 1655 * Remove filesystem from the UUID table. 1656 */ 1657STATIC void 1658xfs_uuid_unmount( 1659 xfs_mount_t *mp) 1660{ 1661 uuid_table_remove(&mp->m_sb.sb_uuid); 1662} 1663 1664/* 1665 * Used to log changes to the superblock unit and width fields which could 1666 * be altered by the mount options. Only the first superblock is updated. 1667 */ 1668STATIC void 1669xfs_mount_log_sbunit( 1670 xfs_mount_t *mp, 1671 __int64_t fields) 1672{ 1673 xfs_trans_t *tp; 1674 1675 ASSERT(fields & (XFS_SB_UNIT|XFS_SB_WIDTH|XFS_SB_UUID)); 1676 1677 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); 1678 if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1679 XFS_DEFAULT_LOG_COUNT)) { 1680 xfs_trans_cancel(tp, 0); 1681 return; 1682 } 1683 xfs_mod_sb(tp, fields); 1684 xfs_trans_commit(tp, 0, NULL); 1685} 1686 1687 1688#ifdef HAVE_PERCPU_SB 1689/* 1690 * Per-cpu incore superblock counters 1691 * 1692 * Simple concept, difficult implementation 1693 * 1694 * Basically, replace the incore superblock counters with a distributed per cpu 1695 * counter for contended fields (e.g. free block count). 1696 * 1697 * Difficulties arise in that the incore sb is used for ENOSPC checking, and 1698 * hence needs to be accurately read when we are running low on space. Hence 1699 * there is a method to enable and disable the per-cpu counters based on how 1700 * much "stuff" is available in them. 1701 * 1702 * Basically, a counter is enabled if there is enough free resource to justify 1703 * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local 1704 * ENOSPC), then we disable the counters to synchronise all callers and 1705 * re-distribute the available resources. 1706 * 1707 * If, once we redistributed the available resources, we still get a failure, 1708 * we disable the per-cpu counter and go through the slow path. 1709 * 1710 * The slow path is the current xfs_mod_incore_sb() function. This means that 1711 * when we disable a per-cpu counter, we need to drain it's resources back to 1712 * the global superblock. We do this after disabling the counter to prevent 1713 * more threads from queueing up on the counter. 1714 * 1715 * Essentially, this means that we still need a lock in the fast path to enable 1716 * synchronisation between the global counters and the per-cpu counters. This 1717 * is not a problem because the lock will be local to a CPU almost all the time 1718 * and have little contention except when we get to ENOSPC conditions. 1719 * 1720 * Basically, this lock becomes a barrier that enables us to lock out the fast 1721 * path while we do things like enabling and disabling counters and 1722 * synchronising the counters. 1723 * 1724 * Locking rules: 1725 * 1726 * 1. XFS_SB_LOCK() before picking up per-cpu locks 1727 * 2. per-cpu locks always picked up via for_each_online_cpu() order 1728 * 3. accurate counter sync requires XFS_SB_LOCK + per cpu locks 1729 * 4. modifying per-cpu counters requires holding per-cpu lock 1730 * 5. modifying global counters requires holding XFS_SB_LOCK 1731 * 6. enabling or disabling a counter requires holding the XFS_SB_LOCK 1732 * and _none_ of the per-cpu locks. 1733 * 1734 * Disabled counters are only ever re-enabled by a balance operation 1735 * that results in more free resources per CPU than a given threshold. 1736 * To ensure counters don't remain disabled, they are rebalanced when 1737 * the global resource goes above a higher threshold (i.e. some hysteresis 1738 * is present to prevent thrashing). 1739 */ 1740 1741/* 1742 * hot-plug CPU notifier support. 1743 * 1744 * We cannot use the hotcpu_register() function because it does 1745 * not allow notifier instances. We need a notifier per filesystem 1746 * as we need to be able to identify the filesystem to balance 1747 * the counters out. This is achieved by having a notifier block 1748 * embedded in the xfs_mount_t and doing pointer magic to get the 1749 * mount pointer from the notifier block address. 1750 */ 1751STATIC int 1752xfs_icsb_cpu_notify( 1753 struct notifier_block *nfb, 1754 unsigned long action, 1755 void *hcpu) 1756{ 1757 xfs_icsb_cnts_t *cntp; 1758 xfs_mount_t *mp; 1759 int s; 1760 1761 mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier); 1762 cntp = (xfs_icsb_cnts_t *) 1763 per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); 1764 switch (action) { 1765 case CPU_UP_PREPARE: 1766 /* Easy Case - initialize the area and locks, and 1767 * then rebalance when online does everything else for us. */ 1768 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1769 break; 1770 case CPU_ONLINE: 1771 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); 1772 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); 1773 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); 1774 break; 1775 case CPU_DEAD: 1776 /* Disable all the counters, then fold the dead cpu's 1777 * count into the total on the global superblock and 1778 * re-enable the counters. */ 1779 s = XFS_SB_LOCK(mp); 1780 xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT); 1781 xfs_icsb_disable_counter(mp, XFS_SBS_IFREE); 1782 xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS); 1783 1784 mp->m_sb.sb_icount += cntp->icsb_icount; 1785 mp->m_sb.sb_ifree += cntp->icsb_ifree; 1786 mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks; 1787 1788 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1789 1790 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, XFS_ICSB_SB_LOCKED); 1791 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, XFS_ICSB_SB_LOCKED); 1792 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, XFS_ICSB_SB_LOCKED); 1793 XFS_SB_UNLOCK(mp, s); 1794 break; 1795 } 1796 1797 return NOTIFY_OK; 1798} 1799 1800int 1801xfs_icsb_init_counters( 1802 xfs_mount_t *mp) 1803{ 1804 xfs_icsb_cnts_t *cntp; 1805 int i; 1806 1807 mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t); 1808 if (mp->m_sb_cnts == NULL) 1809 return -ENOMEM; 1810 1811 mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify; 1812 mp->m_icsb_notifier.priority = 0; 1813 register_cpu_notifier(&mp->m_icsb_notifier); 1814 1815 for_each_online_cpu(i) { 1816 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1817 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1818 } 1819 /* 1820 * start with all counters disabled so that the 1821 * initial balance kicks us off correctly 1822 */ 1823 mp->m_icsb_counters = -1; 1824 return 0; 1825} 1826 1827STATIC void 1828xfs_icsb_destroy_counters( 1829 xfs_mount_t *mp) 1830{ 1831 if (mp->m_sb_cnts) { 1832 unregister_cpu_notifier(&mp->m_icsb_notifier); 1833 free_percpu(mp->m_sb_cnts); 1834 } 1835} 1836 1837STATIC inline void 1838xfs_icsb_lock_cntr( 1839 xfs_icsb_cnts_t *icsbp) 1840{ 1841 while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) { 1842 ndelay(1000); 1843 } 1844} 1845 1846STATIC inline void 1847xfs_icsb_unlock_cntr( 1848 xfs_icsb_cnts_t *icsbp) 1849{ 1850 clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags); 1851} 1852 1853 1854STATIC inline void 1855xfs_icsb_lock_all_counters( 1856 xfs_mount_t *mp) 1857{ 1858 xfs_icsb_cnts_t *cntp; 1859 int i; 1860 1861 for_each_online_cpu(i) { 1862 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1863 xfs_icsb_lock_cntr(cntp); 1864 } 1865} 1866 1867STATIC inline void 1868xfs_icsb_unlock_all_counters( 1869 xfs_mount_t *mp) 1870{ 1871 xfs_icsb_cnts_t *cntp; 1872 int i; 1873 1874 for_each_online_cpu(i) { 1875 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1876 xfs_icsb_unlock_cntr(cntp); 1877 } 1878} 1879 1880STATIC void 1881xfs_icsb_count( 1882 xfs_mount_t *mp, 1883 xfs_icsb_cnts_t *cnt, 1884 int flags) 1885{ 1886 xfs_icsb_cnts_t *cntp; 1887 int i; 1888 1889 memset(cnt, 0, sizeof(xfs_icsb_cnts_t)); 1890 1891 if (!(flags & XFS_ICSB_LAZY_COUNT)) 1892 xfs_icsb_lock_all_counters(mp); 1893 1894 for_each_online_cpu(i) { 1895 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1896 cnt->icsb_icount += cntp->icsb_icount; 1897 cnt->icsb_ifree += cntp->icsb_ifree; 1898 cnt->icsb_fdblocks += cntp->icsb_fdblocks; 1899 } 1900 1901 if (!(flags & XFS_ICSB_LAZY_COUNT)) 1902 xfs_icsb_unlock_all_counters(mp); 1903} 1904 1905STATIC int 1906xfs_icsb_counter_disabled( 1907 xfs_mount_t *mp, 1908 xfs_sb_field_t field) 1909{ 1910 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 1911 return test_bit(field, &mp->m_icsb_counters); 1912} 1913 1914STATIC int 1915xfs_icsb_disable_counter( 1916 xfs_mount_t *mp, 1917 xfs_sb_field_t field) 1918{ 1919 xfs_icsb_cnts_t cnt; 1920 1921 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 1922 1923 xfs_icsb_lock_all_counters(mp); 1924 if (!test_and_set_bit(field, &mp->m_icsb_counters)) { 1925 /* drain back to superblock */ 1926 1927 xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT); 1928 switch(field) { 1929 case XFS_SBS_ICOUNT: 1930 mp->m_sb.sb_icount = cnt.icsb_icount; 1931 break; 1932 case XFS_SBS_IFREE: 1933 mp->m_sb.sb_ifree = cnt.icsb_ifree; 1934 break; 1935 case XFS_SBS_FDBLOCKS: 1936 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 1937 break; 1938 default: 1939 BUG(); 1940 } 1941 } 1942 1943 xfs_icsb_unlock_all_counters(mp); 1944 1945 return 0; 1946} 1947 1948STATIC void 1949xfs_icsb_enable_counter( 1950 xfs_mount_t *mp, 1951 xfs_sb_field_t field, 1952 uint64_t count, 1953 uint64_t resid) 1954{ 1955 xfs_icsb_cnts_t *cntp; 1956 int i; 1957 1958 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 1959 1960 xfs_icsb_lock_all_counters(mp); 1961 for_each_online_cpu(i) { 1962 cntp = per_cpu_ptr(mp->m_sb_cnts, i); 1963 switch (field) { 1964 case XFS_SBS_ICOUNT: 1965 cntp->icsb_icount = count + resid; 1966 break; 1967 case XFS_SBS_IFREE: 1968 cntp->icsb_ifree = count + resid; 1969 break; 1970 case XFS_SBS_FDBLOCKS: 1971 cntp->icsb_fdblocks = count + resid; 1972 break; 1973 default: 1974 BUG(); 1975 break; 1976 } 1977 resid = 0; 1978 } 1979 clear_bit(field, &mp->m_icsb_counters); 1980 xfs_icsb_unlock_all_counters(mp); 1981} 1982 1983STATIC void 1984xfs_icsb_sync_counters_int( 1985 xfs_mount_t *mp, 1986 int flags) 1987{ 1988 xfs_icsb_cnts_t cnt; 1989 int s; 1990 1991 /* Pass 1: lock all counters */ 1992 if ((flags & XFS_ICSB_SB_LOCKED) == 0) 1993 s = XFS_SB_LOCK(mp); 1994 1995 xfs_icsb_count(mp, &cnt, flags); 1996 1997 /* Step 3: update mp->m_sb fields */ 1998 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT)) 1999 mp->m_sb.sb_icount = cnt.icsb_icount; 2000 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE)) 2001 mp->m_sb.sb_ifree = cnt.icsb_ifree; 2002 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS)) 2003 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 2004 2005 if ((flags & XFS_ICSB_SB_LOCKED) == 0) 2006 XFS_SB_UNLOCK(mp, s); 2007} 2008 2009/* 2010 * Accurate update of per-cpu counters to incore superblock 2011 */ 2012STATIC void 2013xfs_icsb_sync_counters( 2014 xfs_mount_t *mp) 2015{ 2016 xfs_icsb_sync_counters_int(mp, 0); 2017} 2018 2019/* 2020 * lazy addition used for things like df, background sb syncs, etc 2021 */ 2022void 2023xfs_icsb_sync_counters_lazy( 2024 xfs_mount_t *mp) 2025{ 2026 xfs_icsb_sync_counters_int(mp, XFS_ICSB_LAZY_COUNT); 2027} 2028 2029/* 2030 * Balance and enable/disable counters as necessary. 2031 * 2032 * Thresholds for re-enabling counters are somewhat magic. 2033 * inode counts are chosen to be the same number as single 2034 * on disk allocation chunk per CPU, and free blocks is 2035 * something far enough zero that we aren't going thrash 2036 * when we get near ENOSPC. 2037 */ 2038#define XFS_ICSB_INO_CNTR_REENABLE 64 2039#define XFS_ICSB_FDBLK_CNTR_REENABLE 512 2040STATIC void 2041xfs_icsb_balance_counter( 2042 xfs_mount_t *mp, 2043 xfs_sb_field_t field, 2044 int flags) 2045{ 2046 uint64_t count, resid = 0; 2047 int weight = num_online_cpus(); 2048 int s; 2049 2050 if (!(flags & XFS_ICSB_SB_LOCKED)) 2051 s = XFS_SB_LOCK(mp); 2052 2053 /* disable counter and sync counter */ 2054 xfs_icsb_disable_counter(mp, field); 2055 2056 /* update counters - first CPU gets residual*/ 2057 switch (field) { 2058 case XFS_SBS_ICOUNT: 2059 count = mp->m_sb.sb_icount; 2060 resid = do_div(count, weight); 2061 if (count < XFS_ICSB_INO_CNTR_REENABLE) 2062 goto out; 2063 break; 2064 case XFS_SBS_IFREE: 2065 count = mp->m_sb.sb_ifree; 2066 resid = do_div(count, weight); 2067 if (count < XFS_ICSB_INO_CNTR_REENABLE) 2068 goto out; 2069 break; 2070 case XFS_SBS_FDBLOCKS: 2071 count = mp->m_sb.sb_fdblocks; 2072 resid = do_div(count, weight); 2073 if (count < XFS_ICSB_FDBLK_CNTR_REENABLE) 2074 goto out; 2075 break; 2076 default: 2077 BUG(); 2078 break; 2079 } 2080 2081 xfs_icsb_enable_counter(mp, field, count, resid); 2082out: 2083 if (!(flags & XFS_ICSB_SB_LOCKED)) 2084 XFS_SB_UNLOCK(mp, s); 2085} 2086 2087STATIC int 2088xfs_icsb_modify_counters_int( 2089 xfs_mount_t *mp, 2090 xfs_sb_field_t field, 2091 int delta, 2092 int rsvd, 2093 int flags) 2094{ 2095 xfs_icsb_cnts_t *icsbp; 2096 long long lcounter; /* long counter for 64 bit fields */ 2097 int cpu, s, locked = 0; 2098 int ret = 0, balance_done = 0; 2099 2100again: 2101 cpu = get_cpu(); 2102 icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu), 2103 xfs_icsb_lock_cntr(icsbp); 2104 if (unlikely(xfs_icsb_counter_disabled(mp, field))) 2105 goto slow_path; 2106 2107 switch (field) { 2108 case XFS_SBS_ICOUNT: 2109 lcounter = icsbp->icsb_icount; 2110 lcounter += delta; 2111 if (unlikely(lcounter < 0)) 2112 goto slow_path; 2113 icsbp->icsb_icount = lcounter; 2114 break; 2115 2116 case XFS_SBS_IFREE: 2117 lcounter = icsbp->icsb_ifree; 2118 lcounter += delta; 2119 if (unlikely(lcounter < 0)) 2120 goto slow_path; 2121 icsbp->icsb_ifree = lcounter; 2122 break; 2123 2124 case XFS_SBS_FDBLOCKS: 2125 BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); 2126 2127 lcounter = icsbp->icsb_fdblocks; 2128 lcounter += delta; 2129 if (unlikely(lcounter < 0)) 2130 goto slow_path; 2131 icsbp->icsb_fdblocks = lcounter; 2132 break; 2133 default: 2134 BUG(); 2135 break; 2136 } 2137 xfs_icsb_unlock_cntr(icsbp); 2138 put_cpu(); 2139 if (locked) 2140 XFS_SB_UNLOCK(mp, s); 2141 return 0; 2142 2143 /* 2144 * The slow path needs to be run with the SBLOCK 2145 * held so that we prevent other threads from 2146 * attempting to run this path at the same time. 2147 * this provides exclusion for the balancing code, 2148 * and exclusive fallback if the balance does not 2149 * provide enough resources to continue in an unlocked 2150 * manner. 2151 */ 2152slow_path: 2153 xfs_icsb_unlock_cntr(icsbp); 2154 put_cpu(); 2155 2156 /* need to hold superblock incase we need 2157 * to disable a counter */ 2158 if (!(flags & XFS_ICSB_SB_LOCKED)) { 2159 s = XFS_SB_LOCK(mp); 2160 locked = 1; 2161 flags |= XFS_ICSB_SB_LOCKED; 2162 } 2163 if (!balance_done) { 2164 xfs_icsb_balance_counter(mp, field, flags); 2165 balance_done = 1; 2166 goto again; 2167 } else { 2168 /* 2169 * we might not have enough on this local 2170 * cpu to allocate for a bulk request. 2171 * We need to drain this field from all CPUs 2172 * and disable the counter fastpath 2173 */ 2174 xfs_icsb_disable_counter(mp, field); 2175 } 2176 2177 ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 2178 2179 if (locked) 2180 XFS_SB_UNLOCK(mp, s); 2181 return ret; 2182} 2183 2184STATIC int 2185xfs_icsb_modify_counters( 2186 xfs_mount_t *mp, 2187 xfs_sb_field_t field, 2188 int delta, 2189 int rsvd) 2190{ 2191 return xfs_icsb_modify_counters_int(mp, field, delta, rsvd, 0); 2192} 2193 2194/* 2195 * Called when superblock is already locked 2196 */ 2197STATIC int 2198xfs_icsb_modify_counters_locked( 2199 xfs_mount_t *mp, 2200 xfs_sb_field_t field, 2201 int delta, 2202 int rsvd) 2203{ 2204 return xfs_icsb_modify_counters_int(mp, field, delta, 2205 rsvd, XFS_ICSB_SB_LOCKED); 2206} 2207#endif 2208