1153323Srodrigc/*
2159451Srodrigc * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3159451Srodrigc * All Rights Reserved.
4153323Srodrigc *
5159451Srodrigc * This program is free software; you can redistribute it and/or
6159451Srodrigc * modify it under the terms of the GNU General Public License as
7153323Srodrigc * published by the Free Software Foundation.
8153323Srodrigc *
9159451Srodrigc * This program is distributed in the hope that it would be useful,
10159451Srodrigc * but WITHOUT ANY WARRANTY; without even the implied warranty of
11159451Srodrigc * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12159451Srodrigc * GNU General Public License for more details.
13153323Srodrigc *
14159451Srodrigc * You should have received a copy of the GNU General Public License
15159451Srodrigc * along with this program; if not, write the Free Software Foundation,
16159451Srodrigc * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17153323Srodrigc */
18153323Srodrigc#include "xfs.h"
19159451Srodrigc#include "xfs_fs.h"
20153323Srodrigc#include "xfs_types.h"
21159451Srodrigc#include "xfs_bit.h"
22159451Srodrigc#include "xfs_log.h"
23153323Srodrigc#include "xfs_inum.h"
24153323Srodrigc#include "xfs_trans.h"
25153323Srodrigc#include "xfs_sb.h"
26153323Srodrigc#include "xfs_ag.h"
27153323Srodrigc#include "xfs_dir.h"
28153323Srodrigc#include "xfs_dir2.h"
29153323Srodrigc#include "xfs_dmapi.h"
30153323Srodrigc#include "xfs_mount.h"
31159451Srodrigc#include "xfs_bmap_btree.h"
32153323Srodrigc#include "xfs_alloc_btree.h"
33153323Srodrigc#include "xfs_ialloc_btree.h"
34153323Srodrigc#include "xfs_dir_sf.h"
35153323Srodrigc#include "xfs_dir2_sf.h"
36159451Srodrigc#include "xfs_attr_sf.h"
37153323Srodrigc#include "xfs_dinode.h"
38153323Srodrigc#include "xfs_inode.h"
39159451Srodrigc#include "xfs_btree.h"
40159451Srodrigc#include "xfs_ialloc.h"
41153323Srodrigc#include "xfs_alloc.h"
42153323Srodrigc#include "xfs_rtalloc.h"
43153323Srodrigc#include "xfs_bmap.h"
44153323Srodrigc#include "xfs_error.h"
45153323Srodrigc#include "xfs_rw.h"
46153323Srodrigc#include "xfs_quota.h"
47153323Srodrigc#include "xfs_fsops.h"
48153323Srodrigc
49153323SrodrigcSTATIC void	xfs_mount_log_sbunit(xfs_mount_t *, __int64_t);
50153323SrodrigcSTATIC int	xfs_uuid_mount(xfs_mount_t *);
51153323SrodrigcSTATIC void	xfs_uuid_unmount(xfs_mount_t *mp);
52159451SrodrigcSTATIC void	xfs_unmountfs_wait(xfs_mount_t *);
53153323Srodrigc
54153323Srodrigc
55159451Srodrigc#ifdef HAVE_PERCPU_SB
56159451SrodrigcSTATIC void	xfs_icsb_destroy_counters(xfs_mount_t *);
57159451SrodrigcSTATIC void	xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int);
58159451SrodrigcSTATIC void	xfs_icsb_sync_counters(xfs_mount_t *);
59159451SrodrigcSTATIC int	xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
60159451Srodrigc						int, int);
61159451SrodrigcSTATIC int	xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t,
62159451Srodrigc						int, int);
63159451SrodrigcSTATIC int	xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
64153323Srodrigc
65159451Srodrigc#else
66159451Srodrigc
67159451Srodrigc#define xfs_icsb_destroy_counters(mp)			do { } while (0)
68159451Srodrigc#define xfs_icsb_balance_counter(mp, a, b)		do { } while (0)
69159451Srodrigc#define xfs_icsb_sync_counters(mp)			do { } while (0)
70159451Srodrigc#define xfs_icsb_modify_counters(mp, a, b, c)		do { } while (0)
71159451Srodrigc#define xfs_icsb_modify_counters_locked(mp, a, b, c)	do { } while (0)
72159451Srodrigc
73159451Srodrigc#endif
74159451Srodrigc
75159451Srodrigcstatic const struct {
76159451Srodrigc	short offset;
77159451Srodrigc	short type;	/* 0 = integer
78159451Srodrigc			 * 1 = binary / string (no translation)
79159451Srodrigc			 */
80153323Srodrigc} xfs_sb_info[] = {
81153323Srodrigc    { offsetof(xfs_sb_t, sb_magicnum),   0 },
82153323Srodrigc    { offsetof(xfs_sb_t, sb_blocksize),  0 },
83153323Srodrigc    { offsetof(xfs_sb_t, sb_dblocks),    0 },
84153323Srodrigc    { offsetof(xfs_sb_t, sb_rblocks),    0 },
85153323Srodrigc    { offsetof(xfs_sb_t, sb_rextents),   0 },
86153323Srodrigc    { offsetof(xfs_sb_t, sb_uuid),       1 },
87153323Srodrigc    { offsetof(xfs_sb_t, sb_logstart),   0 },
88153323Srodrigc    { offsetof(xfs_sb_t, sb_rootino),    0 },
89153323Srodrigc    { offsetof(xfs_sb_t, sb_rbmino),     0 },
90153323Srodrigc    { offsetof(xfs_sb_t, sb_rsumino),    0 },
91153323Srodrigc    { offsetof(xfs_sb_t, sb_rextsize),   0 },
92153323Srodrigc    { offsetof(xfs_sb_t, sb_agblocks),   0 },
93153323Srodrigc    { offsetof(xfs_sb_t, sb_agcount),    0 },
94153323Srodrigc    { offsetof(xfs_sb_t, sb_rbmblocks),  0 },
95153323Srodrigc    { offsetof(xfs_sb_t, sb_logblocks),  0 },
96153323Srodrigc    { offsetof(xfs_sb_t, sb_versionnum), 0 },
97153323Srodrigc    { offsetof(xfs_sb_t, sb_sectsize),   0 },
98153323Srodrigc    { offsetof(xfs_sb_t, sb_inodesize),  0 },
99153323Srodrigc    { offsetof(xfs_sb_t, sb_inopblock),  0 },
100153323Srodrigc    { offsetof(xfs_sb_t, sb_fname[0]),   1 },
101153323Srodrigc    { offsetof(xfs_sb_t, sb_blocklog),   0 },
102153323Srodrigc    { offsetof(xfs_sb_t, sb_sectlog),    0 },
103153323Srodrigc    { offsetof(xfs_sb_t, sb_inodelog),   0 },
104153323Srodrigc    { offsetof(xfs_sb_t, sb_inopblog),   0 },
105153323Srodrigc    { offsetof(xfs_sb_t, sb_agblklog),   0 },
106153323Srodrigc    { offsetof(xfs_sb_t, sb_rextslog),   0 },
107153323Srodrigc    { offsetof(xfs_sb_t, sb_inprogress), 0 },
108153323Srodrigc    { offsetof(xfs_sb_t, sb_imax_pct),   0 },
109153323Srodrigc    { offsetof(xfs_sb_t, sb_icount),     0 },
110153323Srodrigc    { offsetof(xfs_sb_t, sb_ifree),      0 },
111153323Srodrigc    { offsetof(xfs_sb_t, sb_fdblocks),   0 },
112153323Srodrigc    { offsetof(xfs_sb_t, sb_frextents),  0 },
113153323Srodrigc    { offsetof(xfs_sb_t, sb_uquotino),   0 },
114153323Srodrigc    { offsetof(xfs_sb_t, sb_gquotino),   0 },
115153323Srodrigc    { offsetof(xfs_sb_t, sb_qflags),     0 },
116153323Srodrigc    { offsetof(xfs_sb_t, sb_flags),      0 },
117153323Srodrigc    { offsetof(xfs_sb_t, sb_shared_vn),  0 },
118153323Srodrigc    { offsetof(xfs_sb_t, sb_inoalignmt), 0 },
119153323Srodrigc    { offsetof(xfs_sb_t, sb_unit),	 0 },
120153323Srodrigc    { offsetof(xfs_sb_t, sb_width),	 0 },
121153323Srodrigc    { offsetof(xfs_sb_t, sb_dirblklog),	 0 },
122153323Srodrigc    { offsetof(xfs_sb_t, sb_logsectlog), 0 },
123153323Srodrigc    { offsetof(xfs_sb_t, sb_logsectsize),0 },
124153323Srodrigc    { offsetof(xfs_sb_t, sb_logsunit),	 0 },
125159451Srodrigc    { offsetof(xfs_sb_t, sb_features2),	 0 },
126153323Srodrigc    { sizeof(xfs_sb_t),			 0 }
127153323Srodrigc};
128153323Srodrigc
129153323Srodrigc/*
130153323Srodrigc * Return a pointer to an initialized xfs_mount structure.
131153323Srodrigc */
132153323Srodrigcxfs_mount_t *
133153323Srodrigcxfs_mount_init(void)
134153323Srodrigc{
135153323Srodrigc	xfs_mount_t *mp;
136153323Srodrigc
137159451Srodrigc	mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP);
138153323Srodrigc
139159451Srodrigc	if (xfs_icsb_init_counters(mp)) {
140159451Srodrigc		mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
141159451Srodrigc	}
142159451Srodrigc
143153323Srodrigc	AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
144153323Srodrigc	spinlock_init(&mp->m_sb_lock, "xfs_sb");
145159451Srodrigc	/* FreeBSD specfic */
146159451Srodrigc	sx_init(&mp->m_ilock, "xfs_mnt");
147153323Srodrigc	initnsema(&mp->m_growlock, 1, "xfs_grow");
148153323Srodrigc	/*
149153323Srodrigc	 * Initialize the AIL.
150153323Srodrigc	 */
151153323Srodrigc	xfs_trans_ail_init(mp);
152153323Srodrigc
153153323Srodrigc	atomic_set(&mp->m_active_trans, 0);
154153323Srodrigc
155153323Srodrigc	return mp;
156153323Srodrigc}
157153323Srodrigc
158153323Srodrigc/*
159153323Srodrigc * Free up the resources associated with a mount structure.  Assume that
160153323Srodrigc * the structure was initially zeroed, so we can tell which fields got
161153323Srodrigc * initialized.
162153323Srodrigc */
163153323Srodrigcvoid
164153323Srodrigcxfs_mount_free(
165159451Srodrigc	xfs_mount_t	*mp,
166159451Srodrigc	int		remove_bhv)
167153323Srodrigc{
168153323Srodrigc	if (mp->m_ihash)
169153323Srodrigc		xfs_ihash_free(mp);
170153323Srodrigc	if (mp->m_chash)
171153323Srodrigc		xfs_chash_free(mp);
172153323Srodrigc
173153323Srodrigc	if (mp->m_perag) {
174153323Srodrigc		int	agno;
175153323Srodrigc
176159451Srodrigc		for (agno = 0; agno < mp->m_maxagi; agno++)
177153323Srodrigc			if (mp->m_perag[agno].pagb_list)
178153323Srodrigc				kmem_free(mp->m_perag[agno].pagb_list,
179153323Srodrigc						sizeof(xfs_perag_busy_t) *
180153323Srodrigc							XFS_PAGB_NUM_SLOTS);
181153323Srodrigc		kmem_free(mp->m_perag,
182153323Srodrigc			  sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
183153323Srodrigc	}
184153323Srodrigc
185153323Srodrigc	AIL_LOCK_DESTROY(&mp->m_ail_lock);
186153323Srodrigc	spinlock_destroy(&mp->m_sb_lock);
187159451Srodrigc	/* FreeBSD specfic */
188159451Srodrigc	sx_destroy(&mp->m_ilock);
189153323Srodrigc	freesema(&mp->m_growlock);
190153323Srodrigc	if (mp->m_quotainfo)
191153323Srodrigc		XFS_QM_DONE(mp);
192153323Srodrigc
193153323Srodrigc	if (mp->m_fsname != NULL)
194153323Srodrigc		kmem_free(mp->m_fsname, mp->m_fsname_len);
195159451Srodrigc	if (mp->m_rtname != NULL)
196159451Srodrigc		kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1);
197159451Srodrigc	if (mp->m_logname != NULL)
198159451Srodrigc		kmem_free(mp->m_logname, strlen(mp->m_logname) + 1);
199153323Srodrigc
200153323Srodrigc	if (remove_bhv) {
201159451Srodrigc		xfs_vfs_t	*vfsp = XFS_MTOVFS(mp);
202153323Srodrigc
203153323Srodrigc		bhv_remove_all_vfsops(vfsp, 0);
204153323Srodrigc		VFS_REMOVEBHV(vfsp, &mp->m_bhv);
205153323Srodrigc	}
206153323Srodrigc
207159451Srodrigc	xfs_icsb_destroy_counters(mp);
208153323Srodrigc	kmem_free(mp, sizeof(xfs_mount_t));
209153323Srodrigc}
210153323Srodrigc
211153323Srodrigc
212153323Srodrigc/*
213153323Srodrigc * Check the validity of the SB found.
214153323Srodrigc */
215153323SrodrigcSTATIC int
216153323Srodrigcxfs_mount_validate_sb(
217153323Srodrigc	xfs_mount_t	*mp,
218159451Srodrigc	xfs_sb_t	*sbp,
219159451Srodrigc	int		flags)
220153323Srodrigc{
221153323Srodrigc	/*
222153323Srodrigc	 * If the log device and data device have the
223153323Srodrigc	 * same device number, the log is internal.
224153323Srodrigc	 * Consequently, the sb_logstart should be non-zero.  If
225153323Srodrigc	 * we have a zero sb_logstart in this case, we may be trying to mount
226153323Srodrigc	 * a volume filesystem in a non-volume manner.
227153323Srodrigc	 */
228153323Srodrigc	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
229159451Srodrigc		xfs_fs_mount_cmn_err(flags, "bad magic number");
230153323Srodrigc		return XFS_ERROR(EWRONGFS);
231153323Srodrigc	}
232153323Srodrigc
233153323Srodrigc	if (!XFS_SB_GOOD_VERSION(sbp)) {
234159451Srodrigc		xfs_fs_mount_cmn_err(flags, "bad version");
235153323Srodrigc		return XFS_ERROR(EWRONGFS);
236153323Srodrigc	}
237153323Srodrigc
238153323Srodrigc	if (unlikely(
239153323Srodrigc	    sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
240159451Srodrigc		xfs_fs_mount_cmn_err(flags,
241159451Srodrigc			"filesystem is marked as having an external log; "
242159451Srodrigc			"specify logdev on the\nmount command line.");
243159451Srodrigc		return XFS_ERROR(EINVAL);
244153323Srodrigc	}
245153323Srodrigc
246153323Srodrigc	if (unlikely(
247153323Srodrigc	    sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
248159451Srodrigc		xfs_fs_mount_cmn_err(flags,
249159451Srodrigc			"filesystem is marked as having an internal log; "
250159451Srodrigc			"do not specify logdev on\nthe mount command line.");
251159451Srodrigc		return XFS_ERROR(EINVAL);
252153323Srodrigc	}
253153323Srodrigc
254153323Srodrigc	/*
255153323Srodrigc	 * More sanity checking. These were stolen directly from
256153323Srodrigc	 * xfs_repair.
257153323Srodrigc	 */
258153323Srodrigc	if (unlikely(
259153323Srodrigc	    sbp->sb_agcount <= 0					||
260153323Srodrigc	    sbp->sb_sectsize < XFS_MIN_SECTORSIZE			||
261153323Srodrigc	    sbp->sb_sectsize > XFS_MAX_SECTORSIZE			||
262153323Srodrigc	    sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG			||
263153323Srodrigc	    sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG			||
264153323Srodrigc	    sbp->sb_blocksize < XFS_MIN_BLOCKSIZE			||
265153323Srodrigc	    sbp->sb_blocksize > XFS_MAX_BLOCKSIZE			||
266153323Srodrigc	    sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG			||
267153323Srodrigc	    sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG			||
268153323Srodrigc	    sbp->sb_inodesize < XFS_DINODE_MIN_SIZE			||
269153323Srodrigc	    sbp->sb_inodesize > XFS_DINODE_MAX_SIZE			||
270159451Srodrigc	    sbp->sb_inodelog < XFS_DINODE_MIN_LOG			||
271159451Srodrigc	    sbp->sb_inodelog > XFS_DINODE_MAX_LOG			||
272159451Srodrigc	    (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)	||
273153323Srodrigc	    (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)	||
274153323Srodrigc	    (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)	||
275159451Srodrigc	    (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) {
276159451Srodrigc		xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed");
277153323Srodrigc		return XFS_ERROR(EFSCORRUPTED);
278153323Srodrigc	}
279153323Srodrigc
280153323Srodrigc	/*
281153323Srodrigc	 * Sanity check AG count, size fields against data size field
282153323Srodrigc	 */
283153323Srodrigc	if (unlikely(
284153323Srodrigc	    sbp->sb_dblocks == 0 ||
285153323Srodrigc	    sbp->sb_dblocks >
286153323Srodrigc	     (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks ||
287153323Srodrigc	    sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
288153323Srodrigc			      sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
289159451Srodrigc		xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed");
290153323Srodrigc		return XFS_ERROR(EFSCORRUPTED);
291153323Srodrigc	}
292153323Srodrigc
293159451Srodrigc	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
294159451Srodrigc	ASSERT(sbp->sb_blocklog >= BBSHIFT);
295159451Srodrigc
296159451Srodrigc#if XFS_BIG_BLKNOS     /* Limited by ULONG_MAX of page cache index */
297153323Srodrigc	if (unlikely(
298159451Srodrigc	    (sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX ||
299159451Srodrigc	    (sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) {
300159451Srodrigc#else                  /* Limited by UINT_MAX of sectors */
301159451Srodrigc	if (unlikely(
302159451Srodrigc	    (sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX ||
303159451Srodrigc	    (sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) {
304159451Srodrigc#endif
305159451Srodrigc		xfs_fs_mount_cmn_err(flags,
306159451Srodrigc			"file system too large to be mounted on this system.");
307153323Srodrigc		return XFS_ERROR(E2BIG);
308153323Srodrigc	}
309153323Srodrigc
310153323Srodrigc	if (unlikely(sbp->sb_inprogress)) {
311159451Srodrigc		xfs_fs_mount_cmn_err(flags, "file system busy");
312153323Srodrigc		return XFS_ERROR(EFSCORRUPTED);
313153323Srodrigc	}
314153323Srodrigc
315153323Srodrigc	/*
316159451Srodrigc	 * Version 1 directory format has never worked on Linux.
317159451Srodrigc	 */
318159451Srodrigc	if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) {
319159451Srodrigc		xfs_fs_mount_cmn_err(flags,
320159451Srodrigc			"file system using version 1 directory format");
321159451Srodrigc		return XFS_ERROR(ENOSYS);
322159451Srodrigc	}
323159451Srodrigc
324159451Srodrigc	/*
325153323Srodrigc	 * Until this is fixed only page-sized or smaller data blocks work.
326153323Srodrigc	 */
327153323Srodrigc	if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
328159451Srodrigc		xfs_fs_mount_cmn_err(flags,
329159451Srodrigc			"file system with blocksize %d bytes",
330153323Srodrigc			sbp->sb_blocksize);
331159451Srodrigc		xfs_fs_mount_cmn_err(flags,
332159451Srodrigc			"only pagesize (%ld) or less will currently work.",
333153323Srodrigc			PAGE_SIZE);
334153323Srodrigc		return XFS_ERROR(ENOSYS);
335153323Srodrigc	}
336153323Srodrigc
337153323Srodrigc	return 0;
338153323Srodrigc}
339153323Srodrigc
340159451Srodrigcxfs_agnumber_t
341159451Srodrigcxfs_initialize_perag(
342159451Srodrigc	struct xfs_vfs	*vfs,
343159451Srodrigc	xfs_mount_t	*mp,
344159451Srodrigc	xfs_agnumber_t	agcount)
345153323Srodrigc{
346159451Srodrigc	xfs_agnumber_t	index, max_metadata;
347153323Srodrigc	xfs_perag_t	*pag;
348153323Srodrigc	xfs_agino_t	agino;
349153323Srodrigc	xfs_ino_t	ino;
350153323Srodrigc	xfs_sb_t	*sbp = &mp->m_sb;
351153323Srodrigc	xfs_ino_t	max_inum = XFS_MAXINUMBER_32;
352153323Srodrigc
353153323Srodrigc	/* Check to see if the filesystem can overflow 32 bit inodes */
354153323Srodrigc	agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
355153323Srodrigc	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
356153323Srodrigc
357153323Srodrigc	/* Clear the mount flag if no inode can overflow 32 bits
358153323Srodrigc	 * on this filesystem, or if specifically requested..
359153323Srodrigc	 */
360159451Srodrigc	if ((vfs->vfs_flag & VFS_32BITINODES) && ino > max_inum) {
361153323Srodrigc		mp->m_flags |= XFS_MOUNT_32BITINODES;
362153323Srodrigc	} else {
363153323Srodrigc		mp->m_flags &= ~XFS_MOUNT_32BITINODES;
364153323Srodrigc	}
365153323Srodrigc
366153323Srodrigc	/* If we can overflow then setup the ag headers accordingly */
367153323Srodrigc	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
368153323Srodrigc		/* Calculate how much should be reserved for inodes to
369153323Srodrigc		 * meet the max inode percentage.
370153323Srodrigc		 */
371153323Srodrigc		if (mp->m_maxicount) {
372153323Srodrigc			__uint64_t	icount;
373153323Srodrigc
374153323Srodrigc			icount = sbp->sb_dblocks * sbp->sb_imax_pct;
375153323Srodrigc			do_div(icount, 100);
376153323Srodrigc			icount += sbp->sb_agblocks - 1;
377159451Srodrigc			do_div(icount, sbp->sb_agblocks);
378153323Srodrigc			max_metadata = icount;
379153323Srodrigc		} else {
380153323Srodrigc			max_metadata = agcount;
381153323Srodrigc		}
382153323Srodrigc		for (index = 0; index < agcount; index++) {
383153323Srodrigc			ino = XFS_AGINO_TO_INO(mp, index, agino);
384153323Srodrigc			if (ino > max_inum) {
385153323Srodrigc				index++;
386153323Srodrigc				break;
387153323Srodrigc			}
388153323Srodrigc
389159451Srodrigc			/* This ag is preferred for inodes */
390153323Srodrigc			pag = &mp->m_perag[index];
391153323Srodrigc			pag->pagi_inodeok = 1;
392153323Srodrigc			if (index < max_metadata)
393153323Srodrigc				pag->pagf_metadata = 1;
394153323Srodrigc		}
395153323Srodrigc	} else {
396153323Srodrigc		/* Setup default behavior for smaller filesystems */
397153323Srodrigc		for (index = 0; index < agcount; index++) {
398153323Srodrigc			pag = &mp->m_perag[index];
399153323Srodrigc			pag->pagi_inodeok = 1;
400153323Srodrigc		}
401153323Srodrigc	}
402159451Srodrigc	return index;
403153323Srodrigc}
404153323Srodrigc
405153323Srodrigc/*
406153323Srodrigc * xfs_xlatesb
407153323Srodrigc *
408153323Srodrigc *     data       - on disk version of sb
409153323Srodrigc *     sb         - a superblock
410153323Srodrigc *     dir        - conversion direction: <0 - convert sb to buf
411153323Srodrigc *                                        >0 - convert buf to sb
412153323Srodrigc *     fields     - which fields to copy (bitmask)
413153323Srodrigc */
414153323Srodrigcvoid
415153323Srodrigcxfs_xlatesb(
416153323Srodrigc	void		*data,
417153323Srodrigc	xfs_sb_t	*sb,
418153323Srodrigc	int		dir,
419153323Srodrigc	__int64_t	fields)
420153323Srodrigc{
421153323Srodrigc	xfs_caddr_t	buf_ptr;
422153323Srodrigc	xfs_caddr_t	mem_ptr;
423153323Srodrigc	xfs_sb_field_t	f;
424153323Srodrigc	int		first;
425153323Srodrigc	int		size;
426153323Srodrigc
427153323Srodrigc	ASSERT(dir);
428153323Srodrigc	ASSERT(fields);
429153323Srodrigc
430153323Srodrigc	if (!fields)
431153323Srodrigc		return;
432153323Srodrigc
433153323Srodrigc	buf_ptr = (xfs_caddr_t)data;
434153323Srodrigc	mem_ptr = (xfs_caddr_t)sb;
435153323Srodrigc
436153323Srodrigc	while (fields) {
437153323Srodrigc		f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
438153323Srodrigc		first = xfs_sb_info[f].offset;
439153323Srodrigc		size = xfs_sb_info[f + 1].offset - first;
440153323Srodrigc
441153323Srodrigc		ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1);
442153323Srodrigc
443159451Srodrigc		if (size == 1 || xfs_sb_info[f].type == 1) {
444153323Srodrigc			if (dir > 0) {
445153323Srodrigc				memcpy(mem_ptr + first, buf_ptr + first, size);
446153323Srodrigc			} else {
447153323Srodrigc				memcpy(buf_ptr + first, mem_ptr + first, size);
448153323Srodrigc			}
449153323Srodrigc		} else {
450153323Srodrigc			switch (size) {
451153323Srodrigc			case 2:
452153323Srodrigc				INT_XLATE(*(__uint16_t*)(buf_ptr+first),
453153323Srodrigc					  *(__uint16_t*)(mem_ptr+first),
454159451Srodrigc					  dir, ARCH_CONVERT);
455153323Srodrigc				break;
456153323Srodrigc			case 4:
457153323Srodrigc				INT_XLATE(*(__uint32_t*)(buf_ptr+first),
458153323Srodrigc					  *(__uint32_t*)(mem_ptr+first),
459159451Srodrigc					  dir, ARCH_CONVERT);
460153323Srodrigc				break;
461153323Srodrigc			case 8:
462153323Srodrigc				INT_XLATE(*(__uint64_t*)(buf_ptr+first),
463159451Srodrigc					  *(__uint64_t*)(mem_ptr+first), dir, ARCH_CONVERT);
464153323Srodrigc				break;
465153323Srodrigc			default:
466153323Srodrigc				ASSERT(0);
467153323Srodrigc			}
468153323Srodrigc		}
469153323Srodrigc
470153323Srodrigc		fields &= ~(1LL << f);
471153323Srodrigc	}
472153323Srodrigc}
473153323Srodrigc
474153323Srodrigc/*
475153323Srodrigc * xfs_readsb
476153323Srodrigc *
477153323Srodrigc * Does the initial read of the superblock.
478153323Srodrigc */
479153323Srodrigcint
480159451Srodrigcxfs_readsb(xfs_mount_t *mp, int flags)
481153323Srodrigc{
482153323Srodrigc	unsigned int	sector_size;
483153323Srodrigc	unsigned int	extra_flags;
484153323Srodrigc	xfs_buf_t	*bp;
485153323Srodrigc	xfs_sb_t	*sbp;
486153323Srodrigc	int		error;
487153323Srodrigc
488153323Srodrigc	ASSERT(mp->m_sb_bp == NULL);
489153323Srodrigc	ASSERT(mp->m_ddev_targp != NULL);
490153323Srodrigc
491153323Srodrigc	/*
492153323Srodrigc	 * Allocate a (locked) buffer to hold the superblock.
493153323Srodrigc	 * This will be kept around at all times to optimize
494153323Srodrigc	 * access to the superblock.
495153323Srodrigc	 */
496153323Srodrigc	sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
497159451Srodrigc        extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED;
498153323Srodrigc
499159451Srodrigc	bp = xfs_getsb(mp,0);
500159451Srodrigc
501153323Srodrigc	if (!bp || XFS_BUF_ISERROR(bp)) {
502159451Srodrigc		xfs_fs_mount_cmn_err(flags, "SB read failed");
503153323Srodrigc		error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
504153323Srodrigc		goto fail;
505153323Srodrigc	}
506153323Srodrigc	ASSERT(XFS_BUF_ISBUSY(bp));
507153323Srodrigc	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
508153323Srodrigc
509153323Srodrigc	/*
510153323Srodrigc	 * Initialize the mount structure from the superblock.
511153323Srodrigc	 * But first do some basic consistency checking.
512153323Srodrigc	 */
513153323Srodrigc	sbp = XFS_BUF_TO_SBP(bp);
514159451Srodrigc	xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), 1, XFS_SB_ALL_BITS);
515153323Srodrigc
516159451Srodrigc	error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags);
517153323Srodrigc	if (error) {
518159451Srodrigc		xfs_fs_mount_cmn_err(flags, "SB validate failed");
519153323Srodrigc		goto fail;
520153323Srodrigc	}
521153323Srodrigc
522153323Srodrigc	/*
523153323Srodrigc	 * We must be able to do sector-sized and sector-aligned IO.
524153323Srodrigc	 */
525153323Srodrigc	if (sector_size > mp->m_sb.sb_sectsize) {
526159451Srodrigc		xfs_fs_mount_cmn_err(flags,
527159451Srodrigc			"device supports only %u byte sectors (not %u)",
528153323Srodrigc			sector_size, mp->m_sb.sb_sectsize);
529153323Srodrigc		error = ENOSYS;
530153323Srodrigc		goto fail;
531153323Srodrigc	}
532153323Srodrigc
533153323Srodrigc	/*
534153323Srodrigc	 * If device sector size is smaller than the superblock size,
535153323Srodrigc	 * re-read the superblock so the buffer is correctly sized.
536153323Srodrigc	 */
537153323Srodrigc	if (sector_size < mp->m_sb.sb_sectsize) {
538153323Srodrigc		XFS_BUF_UNMANAGE(bp);
539153323Srodrigc		xfs_buf_relse(bp);
540153323Srodrigc		sector_size = mp->m_sb.sb_sectsize;
541153323Srodrigc		bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
542153323Srodrigc					BTOBB(sector_size), extra_flags);
543153323Srodrigc		if (!bp || XFS_BUF_ISERROR(bp)) {
544159451Srodrigc			xfs_fs_mount_cmn_err(flags, "SB re-read failed");
545153323Srodrigc			error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
546153323Srodrigc			goto fail;
547153323Srodrigc		}
548153323Srodrigc		ASSERT(XFS_BUF_ISBUSY(bp));
549153323Srodrigc		ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
550153323Srodrigc	}
551153323Srodrigc
552159451Srodrigc	xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
553159451Srodrigc	xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
554159451Srodrigc	xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
555159451Srodrigc
556153323Srodrigc	mp->m_sb_bp = bp;
557153323Srodrigc	xfs_buf_relse(bp);
558153323Srodrigc	ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
559153323Srodrigc	return 0;
560153323Srodrigc
561153323Srodrigc fail:
562153323Srodrigc	if (bp) {
563153323Srodrigc		XFS_BUF_UNMANAGE(bp);
564153323Srodrigc		xfs_buf_relse(bp);
565153323Srodrigc	}
566153323Srodrigc	return error;
567153323Srodrigc}
568153323Srodrigc
569153323Srodrigc
570153323Srodrigc/*
571153323Srodrigc * xfs_mount_common
572153323Srodrigc *
573153323Srodrigc * Mount initialization code establishing various mount
574153323Srodrigc * fields from the superblock associated with the given
575153323Srodrigc * mount structure
576153323Srodrigc */
577153323SrodrigcSTATIC void
578153323Srodrigcxfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
579153323Srodrigc{
580153323Srodrigc	int	i;
581153323Srodrigc
582153323Srodrigc	mp->m_agfrotor = mp->m_agirotor = 0;
583159451Srodrigc	spinlock_init(&mp->m_agirotor_lock, "m_agirotor_lock");
584153323Srodrigc	mp->m_maxagi = mp->m_sb.sb_agcount;
585153323Srodrigc	mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
586153323Srodrigc	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
587153323Srodrigc	mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
588153323Srodrigc	mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
589153323Srodrigc	mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
590153323Srodrigc	mp->m_litino = sbp->sb_inodesize -
591153323Srodrigc		((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t));
592153323Srodrigc	mp->m_blockmask = sbp->sb_blocksize - 1;
593153323Srodrigc	mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
594153323Srodrigc	mp->m_blockwmask = mp->m_blockwsize - 1;
595159451Srodrigc#ifdef RMC
596159451Srodrigc	INIT_LIST_HEAD(&mp->m_del_inodes);
597159451Srodrigc#endif
598153323Srodrigc	TAILQ_INIT(&mp->m_del_inodes);
599153323Srodrigc
600153323Srodrigc	/*
601153323Srodrigc	 * Setup for attributes, in case they get created.
602153323Srodrigc	 * This value is for inodes getting attributes for the first time,
603153323Srodrigc	 * the per-inode value is for old attribute values.
604153323Srodrigc	 */
605153323Srodrigc	ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048);
606153323Srodrigc	switch (sbp->sb_inodesize) {
607153323Srodrigc	case 256:
608159451Srodrigc		mp->m_attroffset = XFS_LITINO(mp) -
609159451Srodrigc				   XFS_BMDR_SPACE_CALC(MINABTPTRS);
610153323Srodrigc		break;
611153323Srodrigc	case 512:
612153323Srodrigc	case 1024:
613153323Srodrigc	case 2048:
614159451Srodrigc		mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
615153323Srodrigc		break;
616153323Srodrigc	default:
617153323Srodrigc		ASSERT(0);
618153323Srodrigc	}
619153323Srodrigc	ASSERT(mp->m_attroffset < XFS_LITINO(mp));
620153323Srodrigc
621153323Srodrigc	for (i = 0; i < 2; i++) {
622153323Srodrigc		mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
623153323Srodrigc			xfs_alloc, i == 0);
624153323Srodrigc		mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
625153323Srodrigc			xfs_alloc, i == 0);
626153323Srodrigc	}
627153323Srodrigc	for (i = 0; i < 2; i++) {
628153323Srodrigc		mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
629153323Srodrigc			xfs_bmbt, i == 0);
630153323Srodrigc		mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
631153323Srodrigc			xfs_bmbt, i == 0);
632153323Srodrigc	}
633153323Srodrigc	for (i = 0; i < 2; i++) {
634153323Srodrigc		mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
635153323Srodrigc			xfs_inobt, i == 0);
636153323Srodrigc		mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
637153323Srodrigc			xfs_inobt, i == 0);
638153323Srodrigc	}
639153323Srodrigc
640153323Srodrigc	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
641153323Srodrigc	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
642153323Srodrigc					sbp->sb_inopblock);
643153323Srodrigc	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
644153323Srodrigc}
645153323Srodrigc/*
646153323Srodrigc * xfs_mountfs
647153323Srodrigc *
648153323Srodrigc * This function does the following on an initial mount of a file system:
649153323Srodrigc *	- reads the superblock from disk and init the mount struct
650153323Srodrigc *	- if we're a 32-bit kernel, do a size check on the superblock
651153323Srodrigc *		so we don't mount terabyte filesystems
652153323Srodrigc *	- init mount struct realtime fields
653153323Srodrigc *	- allocate inode hash table for fs
654153323Srodrigc *	- init directory manager
655153323Srodrigc *	- perform recovery and init the log manager
656153323Srodrigc */
657153323Srodrigcint
658153323Srodrigcxfs_mountfs(
659153323Srodrigc	xfs_vfs_t	*vfsp,
660153323Srodrigc	xfs_mount_t	*mp,
661153323Srodrigc	int		mfsi_flags)
662153323Srodrigc{
663153323Srodrigc	xfs_buf_t	*bp;
664153323Srodrigc	xfs_sb_t	*sbp = &(mp->m_sb);
665153323Srodrigc	xfs_inode_t	*rip;
666159451Srodrigc	xfs_vnode_t	*rvp = NULL;
667153323Srodrigc	int		readio_log, writeio_log;
668153323Srodrigc	xfs_daddr_t	d;
669153323Srodrigc	__uint64_t	ret64;
670153323Srodrigc	__int64_t	update_flags;
671153323Srodrigc	uint		quotamount, quotaflags;
672153323Srodrigc	int		agno;
673153323Srodrigc	int		uuid_mounted = 0;
674153323Srodrigc	int		error = 0;
675153323Srodrigc
676153323Srodrigc	if (mp->m_sb_bp == NULL) {
677159451Srodrigc		if ((error = xfs_readsb(mp, mfsi_flags))) {
678159451Srodrigc			return error;
679153323Srodrigc		}
680153323Srodrigc	}
681153323Srodrigc	xfs_mount_common(mp, sbp);
682153323Srodrigc
683153323Srodrigc	/*
684153323Srodrigc	 * Check if sb_agblocks is aligned at stripe boundary
685153323Srodrigc	 * If sb_agblocks is NOT aligned turn off m_dalign since
686153323Srodrigc	 * allocator alignment is within an ag, therefore ag has
687153323Srodrigc	 * to be aligned at stripe boundary.
688153323Srodrigc	 */
689153323Srodrigc	update_flags = 0LL;
690153323Srodrigc	if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) {
691153323Srodrigc		/*
692153323Srodrigc		 * If stripe unit and stripe width are not multiples
693153323Srodrigc		 * of the fs blocksize turn off alignment.
694153323Srodrigc		 */
695153323Srodrigc		if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
696153323Srodrigc		    (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
697153323Srodrigc			if (mp->m_flags & XFS_MOUNT_RETERR) {
698153323Srodrigc				cmn_err(CE_WARN,
699153323Srodrigc					"XFS: alignment check 1 failed");
700153323Srodrigc				error = XFS_ERROR(EINVAL);
701153323Srodrigc				goto error1;
702153323Srodrigc			}
703153323Srodrigc			mp->m_dalign = mp->m_swidth = 0;
704153323Srodrigc		} else {
705153323Srodrigc			/*
706153323Srodrigc			 * Convert the stripe unit and width to FSBs.
707153323Srodrigc			 */
708153323Srodrigc			mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
709153323Srodrigc			if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
710153323Srodrigc				if (mp->m_flags & XFS_MOUNT_RETERR) {
711153323Srodrigc					error = XFS_ERROR(EINVAL);
712153323Srodrigc					goto error1;
713153323Srodrigc				}
714159451Srodrigc				xfs_fs_cmn_err(CE_WARN, mp,
715159451Srodrigc"stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)",
716159451Srodrigc					mp->m_dalign, mp->m_swidth,
717159451Srodrigc					sbp->sb_agblocks);
718159451Srodrigc
719153323Srodrigc				mp->m_dalign = 0;
720153323Srodrigc				mp->m_swidth = 0;
721153323Srodrigc			} else if (mp->m_dalign) {
722153323Srodrigc				mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
723153323Srodrigc			} else {
724153323Srodrigc				if (mp->m_flags & XFS_MOUNT_RETERR) {
725159451Srodrigc					xfs_fs_cmn_err(CE_WARN, mp,
726159451Srodrigc"stripe alignment turned off: sunit(%d) less than bsize(%d)",
727159451Srodrigc                                        	mp->m_dalign,
728159451Srodrigc						mp->m_blockmask +1);
729153323Srodrigc					error = XFS_ERROR(EINVAL);
730153323Srodrigc					goto error1;
731153323Srodrigc				}
732153323Srodrigc				mp->m_swidth = 0;
733153323Srodrigc			}
734153323Srodrigc		}
735153323Srodrigc
736153323Srodrigc		/*
737153323Srodrigc		 * Update superblock with new values
738153323Srodrigc		 * and log changes
739153323Srodrigc		 */
740153323Srodrigc		if (XFS_SB_VERSION_HASDALIGN(sbp)) {
741153323Srodrigc			if (sbp->sb_unit != mp->m_dalign) {
742153323Srodrigc				sbp->sb_unit = mp->m_dalign;
743153323Srodrigc				update_flags |= XFS_SB_UNIT;
744153323Srodrigc			}
745153323Srodrigc			if (sbp->sb_width != mp->m_swidth) {
746153323Srodrigc				sbp->sb_width = mp->m_swidth;
747153323Srodrigc				update_flags |= XFS_SB_WIDTH;
748153323Srodrigc			}
749153323Srodrigc		}
750153323Srodrigc	} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
751153323Srodrigc		    XFS_SB_VERSION_HASDALIGN(&mp->m_sb)) {
752153323Srodrigc			mp->m_dalign = sbp->sb_unit;
753153323Srodrigc			mp->m_swidth = sbp->sb_width;
754153323Srodrigc	}
755153323Srodrigc
756153323Srodrigc	xfs_alloc_compute_maxlevels(mp);
757153323Srodrigc	xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
758153323Srodrigc	xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
759153323Srodrigc	xfs_ialloc_compute_maxlevels(mp);
760153323Srodrigc
761153323Srodrigc	if (sbp->sb_imax_pct) {
762153323Srodrigc		__uint64_t	icount;
763153323Srodrigc
764153323Srodrigc		/* Make sure the maximum inode count is a multiple of the
765153323Srodrigc		 * units we allocate inodes in.
766153323Srodrigc		 */
767153323Srodrigc
768153323Srodrigc		icount = sbp->sb_dblocks * sbp->sb_imax_pct;
769153323Srodrigc		do_div(icount, 100);
770153323Srodrigc		do_div(icount, mp->m_ialloc_blks);
771153323Srodrigc		mp->m_maxicount = (icount * mp->m_ialloc_blks)  <<
772153323Srodrigc				   sbp->sb_inopblog;
773153323Srodrigc	} else
774153323Srodrigc		mp->m_maxicount = 0;
775153323Srodrigc
776153323Srodrigc	mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog);
777153323Srodrigc
778153323Srodrigc	/*
779153323Srodrigc	 * XFS uses the uuid from the superblock as the unique
780153323Srodrigc	 * identifier for fsid.  We can not use the uuid from the volume
781153323Srodrigc	 * since a single partition filesystem is identical to a single
782153323Srodrigc	 * partition volume/filesystem.
783153323Srodrigc	 */
784153323Srodrigc	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
785153323Srodrigc	    (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
786153323Srodrigc		if (xfs_uuid_mount(mp)) {
787153323Srodrigc			error = XFS_ERROR(EINVAL);
788153323Srodrigc			goto error1;
789153323Srodrigc		}
790153323Srodrigc		uuid_mounted=1;
791153323Srodrigc		ret64 = uuid_hash64(&sbp->sb_uuid);
792153323Srodrigc		memcpy(&vfsp->vfs_fsid, &ret64, sizeof(ret64));
793153323Srodrigc	}
794153323Srodrigc
795153323Srodrigc	/*
796153323Srodrigc	 * Set the default minimum read and write sizes unless
797153323Srodrigc	 * already specified in a mount option.
798153323Srodrigc	 * We use smaller I/O sizes when the file system
799153323Srodrigc	 * is being used for NFS service (wsync mount option).
800153323Srodrigc	 */
801153323Srodrigc	if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
802153323Srodrigc		if (mp->m_flags & XFS_MOUNT_WSYNC) {
803153323Srodrigc			readio_log = XFS_WSYNC_READIO_LOG;
804153323Srodrigc			writeio_log = XFS_WSYNC_WRITEIO_LOG;
805153323Srodrigc		} else {
806153323Srodrigc			readio_log = XFS_READIO_LOG_LARGE;
807153323Srodrigc			writeio_log = XFS_WRITEIO_LOG_LARGE;
808153323Srodrigc		}
809153323Srodrigc	} else {
810153323Srodrigc		readio_log = mp->m_readio_log;
811153323Srodrigc		writeio_log = mp->m_writeio_log;
812153323Srodrigc	}
813153323Srodrigc
814153323Srodrigc	/*
815153323Srodrigc	 * Set the number of readahead buffers to use based on
816153323Srodrigc	 * physical memory size.
817153323Srodrigc	 */
818153323Srodrigc	if (xfs_physmem <= 4096)		/* <= 16MB */
819153323Srodrigc		mp->m_nreadaheads = XFS_RW_NREADAHEAD_16MB;
820153323Srodrigc	else if (xfs_physmem <= 8192)	/* <= 32MB */
821153323Srodrigc		mp->m_nreadaheads = XFS_RW_NREADAHEAD_32MB;
822153323Srodrigc	else
823153323Srodrigc		mp->m_nreadaheads = XFS_RW_NREADAHEAD_K32;
824153323Srodrigc	if (sbp->sb_blocklog > readio_log) {
825153323Srodrigc		mp->m_readio_log = sbp->sb_blocklog;
826153323Srodrigc	} else {
827153323Srodrigc		mp->m_readio_log = readio_log;
828153323Srodrigc	}
829153323Srodrigc	mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog);
830153323Srodrigc	if (sbp->sb_blocklog > writeio_log) {
831153323Srodrigc		mp->m_writeio_log = sbp->sb_blocklog;
832153323Srodrigc	} else {
833153323Srodrigc		mp->m_writeio_log = writeio_log;
834153323Srodrigc	}
835153323Srodrigc	mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);
836153323Srodrigc
837153323Srodrigc	/*
838153323Srodrigc	 * Set the inode cluster size based on the physical memory
839153323Srodrigc	 * size.  This may still be overridden by the file system
840153323Srodrigc	 * block size if it is larger than the chosen cluster size.
841153323Srodrigc	 */
842153323Srodrigc	if (xfs_physmem <= btoc(32 * 1024 * 1024)) { /* <= 32 MB */
843153323Srodrigc		mp->m_inode_cluster_size = XFS_INODE_SMALL_CLUSTER_SIZE;
844153323Srodrigc	} else {
845153323Srodrigc		mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
846153323Srodrigc	}
847153323Srodrigc	/*
848153323Srodrigc	 * Set whether we're using inode alignment.
849153323Srodrigc	 */
850153323Srodrigc	if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) &&
851153323Srodrigc	    mp->m_sb.sb_inoalignmt >=
852153323Srodrigc	    XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
853153323Srodrigc		mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
854153323Srodrigc	else
855153323Srodrigc		mp->m_inoalign_mask = 0;
856153323Srodrigc	/*
857153323Srodrigc	 * If we are using stripe alignment, check whether
858153323Srodrigc	 * the stripe unit is a multiple of the inode alignment
859153323Srodrigc	 */
860153323Srodrigc	if (mp->m_dalign && mp->m_inoalign_mask &&
861153323Srodrigc	    !(mp->m_dalign & mp->m_inoalign_mask))
862153323Srodrigc		mp->m_sinoalign = mp->m_dalign;
863153323Srodrigc	else
864153323Srodrigc		mp->m_sinoalign = 0;
865153323Srodrigc	/*
866153323Srodrigc	 * Check that the data (and log if separate) are an ok size.
867153323Srodrigc	 */
868153323Srodrigc	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
869153323Srodrigc	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
870153323Srodrigc		cmn_err(CE_WARN, "XFS: size check 1 failed");
871153323Srodrigc		error = XFS_ERROR(E2BIG);
872153323Srodrigc		goto error1;
873153323Srodrigc	}
874153323Srodrigc	error = xfs_read_buf(mp, mp->m_ddev_targp,
875153323Srodrigc			     d - XFS_FSS_TO_BB(mp, 1),
876153323Srodrigc			     XFS_FSS_TO_BB(mp, 1), 0, &bp);
877153323Srodrigc	if (!error) {
878153323Srodrigc		xfs_buf_relse(bp);
879153323Srodrigc	} else {
880153323Srodrigc		cmn_err(CE_WARN, "XFS: size check 2 failed");
881153323Srodrigc		if (error == ENOSPC) {
882153323Srodrigc			error = XFS_ERROR(E2BIG);
883153323Srodrigc		}
884153323Srodrigc		goto error1;
885153323Srodrigc	}
886153323Srodrigc
887153323Srodrigc	if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) &&
888153323Srodrigc	    mp->m_logdev_targp != mp->m_ddev_targp) {
889153323Srodrigc		d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
890153323Srodrigc		if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
891153323Srodrigc			cmn_err(CE_WARN, "XFS: size check 3 failed");
892153323Srodrigc			error = XFS_ERROR(E2BIG);
893153323Srodrigc			goto error1;
894153323Srodrigc		}
895153323Srodrigc		error = xfs_read_buf(mp, mp->m_logdev_targp,
896153323Srodrigc				     d - XFS_FSB_TO_BB(mp, 1),
897153323Srodrigc				     XFS_FSB_TO_BB(mp, 1), 0, &bp);
898153323Srodrigc		if (!error) {
899153323Srodrigc			xfs_buf_relse(bp);
900153323Srodrigc		} else {
901153323Srodrigc			cmn_err(CE_WARN, "XFS: size check 3 failed");
902153323Srodrigc			if (error == ENOSPC) {
903153323Srodrigc				error = XFS_ERROR(E2BIG);
904153323Srodrigc			}
905153323Srodrigc			goto error1;
906153323Srodrigc		}
907153323Srodrigc	}
908153323Srodrigc
909153323Srodrigc	/*
910153323Srodrigc	 * Initialize realtime fields in the mount structure
911153323Srodrigc	 */
912153323Srodrigc	if ((error = xfs_rtmount_init(mp))) {
913153323Srodrigc		cmn_err(CE_WARN, "XFS: RT mount failed");
914153323Srodrigc		goto error1;
915153323Srodrigc	}
916153323Srodrigc
917153323Srodrigc	/*
918153323Srodrigc	 * For client case we are done now
919153323Srodrigc	 */
920153323Srodrigc	if (mfsi_flags & XFS_MFSI_CLIENT) {
921159451Srodrigc		return 0;
922153323Srodrigc	}
923153323Srodrigc
924153323Srodrigc	/*
925153323Srodrigc	 *  Copies the low order bits of the timestamp and the randomly
926153323Srodrigc	 *  set "sequence" number out of a UUID.
927153323Srodrigc	 */
928153323Srodrigc	uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid);
929153323Srodrigc
930153323Srodrigc	/*
931153323Srodrigc	 *  The vfs structure needs to have a file system independent
932153323Srodrigc	 *  way of checking for the invariant file system ID.  Since it
933153323Srodrigc	 *  can't look at mount structures it has a pointer to the data
934153323Srodrigc	 *  in the mount structure.
935153323Srodrigc	 *
936153323Srodrigc	 *  File systems that don't support user level file handles (i.e.
937153323Srodrigc	 *  all of them except for XFS) will leave vfs_altfsid as NULL.
938153323Srodrigc	 */
939153323Srodrigc	vfsp->vfs_altfsid = (xfs_fsid_t *)mp->m_fixedfsid;
940153323Srodrigc	mp->m_dmevmask = 0;	/* not persistent; set after each mount */
941153323Srodrigc
942153323Srodrigc	/*
943153323Srodrigc	 * Select the right directory manager.
944153323Srodrigc	 */
945153323Srodrigc	mp->m_dirops =
946153323Srodrigc		XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ?
947153323Srodrigc			xfsv2_dirops :
948153323Srodrigc			xfsv1_dirops;
949153323Srodrigc
950153323Srodrigc	/*
951153323Srodrigc	 * Initialize directory manager's entries.
952153323Srodrigc	 */
953153323Srodrigc	XFS_DIR_MOUNT(mp);
954153323Srodrigc
955153323Srodrigc	/*
956153323Srodrigc	 * Initialize the attribute manager's entries.
957153323Srodrigc	 */
958153323Srodrigc	mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100;
959153323Srodrigc
960153323Srodrigc	/*
961153323Srodrigc	 * Initialize the precomputed transaction reservations values.
962153323Srodrigc	 */
963153323Srodrigc	xfs_trans_init(mp);
964153323Srodrigc
965153323Srodrigc	/*
966153323Srodrigc	 * Allocate and initialize the inode hash table for this
967153323Srodrigc	 * file system.
968153323Srodrigc	 */
969153323Srodrigc	xfs_ihash_init(mp);
970153323Srodrigc	xfs_chash_init(mp);
971153323Srodrigc
972153323Srodrigc	/*
973153323Srodrigc	 * Allocate and initialize the per-ag data.
974153323Srodrigc	 */
975153323Srodrigc	init_rwsem(&mp->m_peraglock);
976153323Srodrigc	mp->m_perag =
977153323Srodrigc		kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP);
978153323Srodrigc
979159451Srodrigc	mp->m_maxagi = xfs_initialize_perag(vfsp, mp, sbp->sb_agcount);
980153323Srodrigc
981153323Srodrigc	/*
982153323Srodrigc	 * log's mount-time initialization. Perform 1st part recovery if needed
983153323Srodrigc	 */
984153323Srodrigc	if (likely(sbp->sb_logblocks > 0)) {	/* check for volume case */
985153323Srodrigc		error = xfs_log_mount(mp, mp->m_logdev_targp,
986153323Srodrigc				      XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
987153323Srodrigc				      XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
988153323Srodrigc		if (error) {
989153323Srodrigc			cmn_err(CE_WARN, "XFS: log mount failed");
990153323Srodrigc			goto error2;
991153323Srodrigc		}
992153323Srodrigc	} else {	/* No log has been defined */
993153323Srodrigc		cmn_err(CE_WARN, "XFS: no log defined");
994153323Srodrigc		XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp);
995153323Srodrigc		error = XFS_ERROR(EFSCORRUPTED);
996153323Srodrigc		goto error2;
997153323Srodrigc	}
998153323Srodrigc
999153323Srodrigc	/*
1000153323Srodrigc	 * Get and sanity-check the root inode.
1001153323Srodrigc	 * Save the pointer to it in the mount structure.
1002153323Srodrigc	 */
1003159451Srodrigc	error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0);
1004153323Srodrigc	if (error) {
1005153323Srodrigc		cmn_err(CE_WARN, "XFS: failed to read root inode");
1006153323Srodrigc		goto error3;
1007153323Srodrigc	}
1008153323Srodrigc
1009153323Srodrigc	ASSERT(rip != NULL);
1010153323Srodrigc	rvp = XFS_ITOV(rip);
1011153323Srodrigc
1012153323Srodrigc	if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
1013153323Srodrigc		cmn_err(CE_WARN, "XFS: corrupted root inode");
1014159451Srodrigc		printf("Root inode %p is not a directory: %llu",
1015159451Srodrigc		       mp->m_ddev_targp, (unsigned long long)rip->i_ino);
1016153323Srodrigc		xfs_iunlock(rip, XFS_ILOCK_EXCL);
1017153323Srodrigc		XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
1018153323Srodrigc				 mp);
1019153323Srodrigc		error = XFS_ERROR(EFSCORRUPTED);
1020153323Srodrigc		goto error4;
1021153323Srodrigc	}
1022153323Srodrigc	mp->m_rootip = rip;	/* save it */
1023153323Srodrigc
1024153323Srodrigc	xfs_iunlock(rip, XFS_ILOCK_EXCL);
1025153323Srodrigc
1026153323Srodrigc	/*
1027153323Srodrigc	 * Initialize realtime inode pointers in the mount structure
1028153323Srodrigc	 */
1029153323Srodrigc	if ((error = xfs_rtmount_inodes(mp))) {
1030153323Srodrigc		/*
1031153323Srodrigc		 * Free up the root inode.
1032153323Srodrigc		 */
1033153323Srodrigc		cmn_err(CE_WARN, "XFS: failed to read RT inodes");
1034153323Srodrigc		goto error4;
1035153323Srodrigc	}
1036153323Srodrigc
1037153323Srodrigc	/*
1038153323Srodrigc	 * If fs is not mounted readonly, then update the superblock
1039153323Srodrigc	 * unit and width changes.
1040153323Srodrigc	 */
1041153323Srodrigc	if (update_flags && !(vfsp->vfs_flag & VFS_RDONLY))
1042153323Srodrigc		xfs_mount_log_sbunit(mp, update_flags);
1043153323Srodrigc
1044153323Srodrigc	/*
1045153323Srodrigc	 * Initialise the XFS quota management subsystem for this mount
1046153323Srodrigc	 */
1047153323Srodrigc	if ((error = XFS_QM_INIT(mp, &quotamount, &quotaflags)))
1048153323Srodrigc		goto error4;
1049153323Srodrigc
1050153323Srodrigc	/*
1051153323Srodrigc	 * Finish recovering the file system.  This part needed to be
1052153323Srodrigc	 * delayed until after the root and real-time bitmap inodes
1053153323Srodrigc	 * were consistently read in.
1054153323Srodrigc	 */
1055153323Srodrigc	error = xfs_log_mount_finish(mp, mfsi_flags);
1056153323Srodrigc	if (error) {
1057153323Srodrigc		cmn_err(CE_WARN, "XFS: log mount finish failed");
1058153323Srodrigc		goto error4;
1059153323Srodrigc	}
1060153323Srodrigc
1061153323Srodrigc	/*
1062153323Srodrigc	 * Complete the quota initialisation, post-log-replay component.
1063153323Srodrigc	 */
1064159451Srodrigc	if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
1065153323Srodrigc		goto error4;
1066153323Srodrigc
1067153323Srodrigc	return 0;
1068153323Srodrigc
1069153323Srodrigc error4:
1070153323Srodrigc	/*
1071153323Srodrigc	 * Free up the root inode.
1072153323Srodrigc	 */
1073153323Srodrigc	VN_RELE(rvp);
1074153323Srodrigc error3:
1075153323Srodrigc	xfs_log_unmount_dealloc(mp);
1076153323Srodrigc error2:
1077153323Srodrigc	xfs_ihash_free(mp);
1078153323Srodrigc	xfs_chash_free(mp);
1079153323Srodrigc	for (agno = 0; agno < sbp->sb_agcount; agno++)
1080153323Srodrigc		if (mp->m_perag[agno].pagb_list)
1081153323Srodrigc			kmem_free(mp->m_perag[agno].pagb_list,
1082153323Srodrigc			  sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS);
1083153323Srodrigc	kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t));
1084153323Srodrigc	mp->m_perag = NULL;
1085153323Srodrigc	/* FALLTHROUGH */
1086153323Srodrigc error1:
1087153323Srodrigc	if (uuid_mounted)
1088153323Srodrigc		xfs_uuid_unmount(mp);
1089153323Srodrigc	xfs_freesb(mp);
1090153323Srodrigc	return error;
1091153323Srodrigc}
1092153323Srodrigc
1093153323Srodrigc/*
1094153323Srodrigc * xfs_unmountfs
1095153323Srodrigc *
1096153323Srodrigc * This flushes out the inodes,dquots and the superblock, unmounts the
1097153323Srodrigc * log and makes sure that incore structures are freed.
1098153323Srodrigc */
1099153323Srodrigcint
1100153323Srodrigcxfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1101153323Srodrigc{
1102153323Srodrigc	struct xfs_vfs	*vfsp = XFS_MTOVFS(mp);
1103153323Srodrigc#if defined(DEBUG) || defined(INDUCE_IO_ERROR)
1104153323Srodrigc	int64_t		fsid;
1105153323Srodrigc#endif
1106153323Srodrigc
1107159451Srodrigc	xfs_iflush_all(mp);
1108153323Srodrigc
1109159451Srodrigc	XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
1110153323Srodrigc
1111153323Srodrigc	/*
1112153323Srodrigc	 * Flush out the log synchronously so that we know for sure
1113153323Srodrigc	 * that nothing is pinned.  This is important because bflush()
1114153323Srodrigc	 * will skip pinned buffers.
1115153323Srodrigc	 */
1116153323Srodrigc	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
1117153323Srodrigc
1118153323Srodrigc	xfs_binval(mp->m_ddev_targp);
1119153323Srodrigc	if (mp->m_rtdev_targp) {
1120153323Srodrigc		xfs_binval(mp->m_rtdev_targp);
1121153323Srodrigc	}
1122153323Srodrigc
1123153323Srodrigc	xfs_unmountfs_writesb(mp);
1124153323Srodrigc
1125159451Srodrigc	xfs_unmountfs_wait(mp); 		/* wait for async bufs */
1126159451Srodrigc
1127153323Srodrigc	xfs_log_unmount(mp);			/* Done! No more fs ops. */
1128153323Srodrigc
1129153323Srodrigc	xfs_freesb(mp);
1130153323Srodrigc
1131153323Srodrigc	/*
1132153323Srodrigc	 * All inodes from this mount point should be freed.
1133153323Srodrigc	 */
1134159451Srodrigc	//ASSERT(mp->m_inodes == NULL);
1135159451Srodrigc	if (mp->m_inodes != NULL ) {
1136159451Srodrigc		printf("WRONG: mp->m_ireclaims: %d\n", mp->m_ireclaims);
1137159451Srodrigc		printf("WRONG: mp->m_inodes: %p\n", mp->m_inodes);
1138153323Srodrigc	}
1139153323Srodrigc
1140153323Srodrigc	xfs_unmountfs_close(mp, cr);
1141153323Srodrigc	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
1142153323Srodrigc		xfs_uuid_unmount(mp);
1143153323Srodrigc
1144153323Srodrigc#if defined(DEBUG) || defined(INDUCE_IO_ERROR)
1145153323Srodrigc	/*
1146153323Srodrigc	 * clear all error tags on this filesystem
1147153323Srodrigc	 */
1148153323Srodrigc	memcpy(&fsid, &vfsp->vfs_fsid, sizeof(int64_t));
1149153323Srodrigc	xfs_errortag_clearall_umount(fsid, mp->m_fsname, 0);
1150153323Srodrigc#endif
1151153323Srodrigc	XFS_IODONE(vfsp);
1152153323Srodrigc	xfs_mount_free(mp, 1);
1153153323Srodrigc	return 0;
1154153323Srodrigc}
1155153323Srodrigc
1156153323Srodrigcvoid
1157153323Srodrigcxfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr)
1158153323Srodrigc{
1159159451Srodrigc	if (mp->m_logdev_targp != mp->m_ddev_targp)
1160159451Srodrigc		xfs_free_buftarg(mp->m_logdev_targp, 1);
1161159451Srodrigc	if (mp->m_rtdev_targp)
1162159451Srodrigc		xfs_free_buftarg(mp->m_rtdev_targp, 1);
1163159451Srodrigc	xfs_free_buftarg(mp->m_ddev_targp, 0);
1164159451Srodrigc}
1165153323Srodrigc
1166159451SrodrigcSTATIC void
1167159451Srodrigcxfs_unmountfs_wait(xfs_mount_t *mp)
1168159451Srodrigc{
1169159451Srodrigc	if (mp->m_logdev_targp != mp->m_ddev_targp)
1170159451Srodrigc		xfs_wait_buftarg(mp->m_logdev_targp);
1171159451Srodrigc	if (mp->m_rtdev_targp)
1172159451Srodrigc		xfs_wait_buftarg(mp->m_rtdev_targp);
1173159451Srodrigc	xfs_wait_buftarg(mp->m_ddev_targp);
1174153323Srodrigc}
1175153323Srodrigc
1176153323Srodrigcint
1177153323Srodrigcxfs_unmountfs_writesb(xfs_mount_t *mp)
1178153323Srodrigc{
1179153323Srodrigc	xfs_buf_t	*sbp;
1180153323Srodrigc	xfs_sb_t	*sb;
1181153323Srodrigc	int		error = 0;
1182153323Srodrigc
1183153323Srodrigc	/*
1184153323Srodrigc	 * skip superblock write if fs is read-only, or
1185153323Srodrigc	 * if we are doing a forced umount.
1186153323Srodrigc	 */
1187153323Srodrigc	sbp = xfs_getsb(mp, 0);
1188153323Srodrigc	if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY ||
1189153323Srodrigc		XFS_FORCED_SHUTDOWN(mp))) {
1190159451Srodrigc
1191159451Srodrigc		xfs_icsb_sync_counters(mp);
1192159451Srodrigc
1193153323Srodrigc		/*
1194153323Srodrigc		 * mark shared-readonly if desired
1195153323Srodrigc		 */
1196153323Srodrigc		sb = XFS_BUF_TO_SBP(sbp);
1197153323Srodrigc		if (mp->m_mk_sharedro) {
1198153323Srodrigc			if (!(sb->sb_flags & XFS_SBF_READONLY))
1199153323Srodrigc				sb->sb_flags |= XFS_SBF_READONLY;
1200153323Srodrigc			if (!XFS_SB_VERSION_HASSHARED(sb))
1201153323Srodrigc				XFS_SB_VERSION_ADDSHARED(sb);
1202153323Srodrigc			xfs_fs_cmn_err(CE_NOTE, mp,
1203153323Srodrigc				"Unmounting, marking shared read-only");
1204153323Srodrigc		}
1205159451Srodrigc		XFS_BUF_UNDONE(sbp);
1206153323Srodrigc		XFS_BUF_UNREAD(sbp);
1207153323Srodrigc		XFS_BUF_UNDELAYWRITE(sbp);
1208153323Srodrigc		XFS_BUF_WRITE(sbp);
1209153323Srodrigc		XFS_BUF_UNASYNC(sbp);
1210153323Srodrigc		ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
1211153323Srodrigc		xfsbdstrat(mp, sbp);
1212153323Srodrigc		/* Nevermind errors we might get here. */
1213153323Srodrigc		error = xfs_iowait(sbp);
1214153323Srodrigc		if (error)
1215153323Srodrigc			xfs_ioerror_alert("xfs_unmountfs_writesb",
1216153323Srodrigc					  mp, sbp, XFS_BUF_ADDR(sbp));
1217153323Srodrigc		if (error && mp->m_mk_sharedro)
1218153323Srodrigc			xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting.  Filesystem may not be marked shared readonly");
1219153323Srodrigc	}
1220153323Srodrigc	xfs_buf_relse(sbp);
1221159451Srodrigc	return error;
1222153323Srodrigc}
1223153323Srodrigc
1224153323Srodrigc/*
1225153323Srodrigc * xfs_mod_sb() can be used to copy arbitrary changes to the
1226153323Srodrigc * in-core superblock into the superblock buffer to be logged.
1227153323Srodrigc * It does not provide the higher level of locking that is
1228153323Srodrigc * needed to protect the in-core superblock from concurrent
1229153323Srodrigc * access.
1230153323Srodrigc */
1231153323Srodrigcvoid
1232153323Srodrigcxfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1233153323Srodrigc{
1234153323Srodrigc	xfs_buf_t	*bp;
1235153323Srodrigc	int		first;
1236153323Srodrigc	int		last;
1237153323Srodrigc	xfs_mount_t	*mp;
1238153323Srodrigc	xfs_sb_t	*sbp;
1239153323Srodrigc	xfs_sb_field_t	f;
1240153323Srodrigc
1241153323Srodrigc	ASSERT(fields);
1242153323Srodrigc	if (!fields)
1243153323Srodrigc		return;
1244153323Srodrigc	mp = tp->t_mountp;
1245153323Srodrigc	bp = xfs_trans_getsb(tp, mp, 0);
1246153323Srodrigc	sbp = XFS_BUF_TO_SBP(bp);
1247153323Srodrigc	first = sizeof(xfs_sb_t);
1248153323Srodrigc	last = 0;
1249153323Srodrigc
1250153323Srodrigc	/* translate/copy */
1251153323Srodrigc
1252159451Srodrigc	xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), -1, fields);
1253153323Srodrigc
1254153323Srodrigc	/* find modified range */
1255153323Srodrigc
1256153323Srodrigc	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
1257153323Srodrigc	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1258153323Srodrigc	first = xfs_sb_info[f].offset;
1259153323Srodrigc
1260153323Srodrigc	f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
1261153323Srodrigc	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1262153323Srodrigc	last = xfs_sb_info[f + 1].offset - 1;
1263153323Srodrigc
1264153323Srodrigc	xfs_trans_log_buf(tp, bp, first, last);
1265153323Srodrigc}
1266153323Srodrigc/*
1267153323Srodrigc * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
1268153323Srodrigc * a delta to a specified field in the in-core superblock.  Simply
1269153323Srodrigc * switch on the field indicated and apply the delta to that field.
1270153323Srodrigc * Fields are not allowed to dip below zero, so if the delta would
1271153323Srodrigc * do this do not apply it and return EINVAL.
1272153323Srodrigc *
1273153323Srodrigc * The SB_LOCK must be held when this routine is called.
1274153323Srodrigc */
1275159451Srodrigcint
1276153323Srodrigcxfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1277153323Srodrigc			int delta, int rsvd)
1278153323Srodrigc{
1279153323Srodrigc	int		scounter;	/* short counter for 32 bit fields */
1280153323Srodrigc	long long	lcounter;	/* long counter for 64 bit fields */
1281153323Srodrigc	long long	res_used, rem;
1282153323Srodrigc
1283153323Srodrigc	/*
1284153323Srodrigc	 * With the in-core superblock spin lock held, switch
1285153323Srodrigc	 * on the indicated field.  Apply the delta to the
1286153323Srodrigc	 * proper field.  If the fields value would dip below
1287153323Srodrigc	 * 0, then do not apply the delta and return EINVAL.
1288153323Srodrigc	 */
1289153323Srodrigc	switch (field) {
1290153323Srodrigc	case XFS_SBS_ICOUNT:
1291153323Srodrigc		lcounter = (long long)mp->m_sb.sb_icount;
1292153323Srodrigc		lcounter += delta;
1293153323Srodrigc		if (lcounter < 0) {
1294153323Srodrigc			ASSERT(0);
1295159451Srodrigc			return XFS_ERROR(EINVAL);
1296153323Srodrigc		}
1297153323Srodrigc		mp->m_sb.sb_icount = lcounter;
1298159451Srodrigc		return 0;
1299153323Srodrigc	case XFS_SBS_IFREE:
1300153323Srodrigc		lcounter = (long long)mp->m_sb.sb_ifree;
1301153323Srodrigc		lcounter += delta;
1302153323Srodrigc		if (lcounter < 0) {
1303153323Srodrigc			ASSERT(0);
1304159451Srodrigc			return XFS_ERROR(EINVAL);
1305153323Srodrigc		}
1306153323Srodrigc		mp->m_sb.sb_ifree = lcounter;
1307159451Srodrigc		return 0;
1308153323Srodrigc	case XFS_SBS_FDBLOCKS:
1309153323Srodrigc
1310153323Srodrigc		lcounter = (long long)mp->m_sb.sb_fdblocks;
1311153323Srodrigc		res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1312153323Srodrigc
1313153323Srodrigc		if (delta > 0) {		/* Putting blocks back */
1314153323Srodrigc			if (res_used > delta) {
1315153323Srodrigc				mp->m_resblks_avail += delta;
1316153323Srodrigc			} else {
1317153323Srodrigc				rem = delta - res_used;
1318153323Srodrigc				mp->m_resblks_avail = mp->m_resblks;
1319153323Srodrigc				lcounter += rem;
1320153323Srodrigc			}
1321153323Srodrigc		} else {				/* Taking blocks away */
1322153323Srodrigc
1323153323Srodrigc			lcounter += delta;
1324153323Srodrigc
1325153323Srodrigc		/*
1326153323Srodrigc		 * If were out of blocks, use any available reserved blocks if
1327153323Srodrigc		 * were allowed to.
1328153323Srodrigc		 */
1329153323Srodrigc
1330153323Srodrigc			if (lcounter < 0) {
1331153323Srodrigc				if (rsvd) {
1332153323Srodrigc					lcounter = (long long)mp->m_resblks_avail + delta;
1333153323Srodrigc					if (lcounter < 0) {
1334159451Srodrigc						return XFS_ERROR(ENOSPC);
1335153323Srodrigc					}
1336153323Srodrigc					mp->m_resblks_avail = lcounter;
1337159451Srodrigc					return 0;
1338153323Srodrigc				} else {	/* not reserved */
1339159451Srodrigc					return XFS_ERROR(ENOSPC);
1340153323Srodrigc				}
1341153323Srodrigc			}
1342153323Srodrigc		}
1343153323Srodrigc
1344153323Srodrigc		mp->m_sb.sb_fdblocks = lcounter;
1345159451Srodrigc		return 0;
1346153323Srodrigc	case XFS_SBS_FREXTENTS:
1347153323Srodrigc		lcounter = (long long)mp->m_sb.sb_frextents;
1348153323Srodrigc		lcounter += delta;
1349153323Srodrigc		if (lcounter < 0) {
1350159451Srodrigc			return XFS_ERROR(ENOSPC);
1351153323Srodrigc		}
1352153323Srodrigc		mp->m_sb.sb_frextents = lcounter;
1353159451Srodrigc		return 0;
1354153323Srodrigc	case XFS_SBS_DBLOCKS:
1355153323Srodrigc		lcounter = (long long)mp->m_sb.sb_dblocks;
1356153323Srodrigc		lcounter += delta;
1357153323Srodrigc		if (lcounter < 0) {
1358153323Srodrigc			ASSERT(0);
1359159451Srodrigc			return XFS_ERROR(EINVAL);
1360153323Srodrigc		}
1361153323Srodrigc		mp->m_sb.sb_dblocks = lcounter;
1362159451Srodrigc		return 0;
1363153323Srodrigc	case XFS_SBS_AGCOUNT:
1364153323Srodrigc		scounter = mp->m_sb.sb_agcount;
1365153323Srodrigc		scounter += delta;
1366153323Srodrigc		if (scounter < 0) {
1367153323Srodrigc			ASSERT(0);
1368159451Srodrigc			return XFS_ERROR(EINVAL);
1369153323Srodrigc		}
1370153323Srodrigc		mp->m_sb.sb_agcount = scounter;
1371159451Srodrigc		return 0;
1372153323Srodrigc	case XFS_SBS_IMAX_PCT:
1373153323Srodrigc		scounter = mp->m_sb.sb_imax_pct;
1374153323Srodrigc		scounter += delta;
1375153323Srodrigc		if (scounter < 0) {
1376153323Srodrigc			ASSERT(0);
1377159451Srodrigc			return XFS_ERROR(EINVAL);
1378153323Srodrigc		}
1379153323Srodrigc		mp->m_sb.sb_imax_pct = scounter;
1380159451Srodrigc		return 0;
1381153323Srodrigc	case XFS_SBS_REXTSIZE:
1382153323Srodrigc		scounter = mp->m_sb.sb_rextsize;
1383153323Srodrigc		scounter += delta;
1384153323Srodrigc		if (scounter < 0) {
1385153323Srodrigc			ASSERT(0);
1386159451Srodrigc			return XFS_ERROR(EINVAL);
1387153323Srodrigc		}
1388153323Srodrigc		mp->m_sb.sb_rextsize = scounter;
1389159451Srodrigc		return 0;
1390153323Srodrigc	case XFS_SBS_RBMBLOCKS:
1391153323Srodrigc		scounter = mp->m_sb.sb_rbmblocks;
1392153323Srodrigc		scounter += delta;
1393153323Srodrigc		if (scounter < 0) {
1394153323Srodrigc			ASSERT(0);
1395159451Srodrigc			return XFS_ERROR(EINVAL);
1396153323Srodrigc		}
1397153323Srodrigc		mp->m_sb.sb_rbmblocks = scounter;
1398159451Srodrigc		return 0;
1399153323Srodrigc	case XFS_SBS_RBLOCKS:
1400153323Srodrigc		lcounter = (long long)mp->m_sb.sb_rblocks;
1401153323Srodrigc		lcounter += delta;
1402153323Srodrigc		if (lcounter < 0) {
1403153323Srodrigc			ASSERT(0);
1404159451Srodrigc			return XFS_ERROR(EINVAL);
1405153323Srodrigc		}
1406153323Srodrigc		mp->m_sb.sb_rblocks = lcounter;
1407159451Srodrigc		return 0;
1408153323Srodrigc	case XFS_SBS_REXTENTS:
1409153323Srodrigc		lcounter = (long long)mp->m_sb.sb_rextents;
1410153323Srodrigc		lcounter += delta;
1411153323Srodrigc		if (lcounter < 0) {
1412153323Srodrigc			ASSERT(0);
1413159451Srodrigc			return XFS_ERROR(EINVAL);
1414153323Srodrigc		}
1415153323Srodrigc		mp->m_sb.sb_rextents = lcounter;
1416159451Srodrigc		return 0;
1417153323Srodrigc	case XFS_SBS_REXTSLOG:
1418153323Srodrigc		scounter = mp->m_sb.sb_rextslog;
1419153323Srodrigc		scounter += delta;
1420153323Srodrigc		if (scounter < 0) {
1421153323Srodrigc			ASSERT(0);
1422159451Srodrigc			return XFS_ERROR(EINVAL);
1423153323Srodrigc		}
1424153323Srodrigc		mp->m_sb.sb_rextslog = scounter;
1425159451Srodrigc		return 0;
1426153323Srodrigc	default:
1427153323Srodrigc		ASSERT(0);
1428159451Srodrigc		return XFS_ERROR(EINVAL);
1429153323Srodrigc	}
1430153323Srodrigc}
1431153323Srodrigc
1432153323Srodrigc/*
1433153323Srodrigc * xfs_mod_incore_sb() is used to change a field in the in-core
1434153323Srodrigc * superblock structure by the specified delta.  This modification
1435153323Srodrigc * is protected by the SB_LOCK.  Just use the xfs_mod_incore_sb_unlocked()
1436153323Srodrigc * routine to do the work.
1437153323Srodrigc */
1438153323Srodrigcint
1439153323Srodrigcxfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd)
1440153323Srodrigc{
1441153323Srodrigc	unsigned long	s;
1442153323Srodrigc	int	status;
1443153323Srodrigc
1444159451Srodrigc	/* check for per-cpu counters */
1445159451Srodrigc	switch (field) {
1446159451Srodrigc#ifdef HAVE_PERCPU_SB
1447159451Srodrigc	case XFS_SBS_ICOUNT:
1448159451Srodrigc	case XFS_SBS_IFREE:
1449159451Srodrigc	case XFS_SBS_FDBLOCKS:
1450159451Srodrigc		if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
1451159451Srodrigc			status = xfs_icsb_modify_counters(mp, field,
1452159451Srodrigc							delta, rsvd);
1453159451Srodrigc			break;
1454159451Srodrigc		}
1455159451Srodrigc		/* FALLTHROUGH */
1456159451Srodrigc#endif
1457159451Srodrigc	default:
1458159451Srodrigc		s = XFS_SB_LOCK(mp);
1459159451Srodrigc		status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
1460159451Srodrigc		XFS_SB_UNLOCK(mp, s);
1461159451Srodrigc		break;
1462159451Srodrigc	}
1463159451Srodrigc
1464159451Srodrigc	return status;
1465153323Srodrigc}
1466153323Srodrigc
1467153323Srodrigc/*
1468153323Srodrigc * xfs_mod_incore_sb_batch() is used to change more than one field
1469153323Srodrigc * in the in-core superblock structure at a time.  This modification
1470153323Srodrigc * is protected by a lock internal to this module.  The fields and
1471153323Srodrigc * changes to those fields are specified in the array of xfs_mod_sb
1472153323Srodrigc * structures passed in.
1473153323Srodrigc *
1474153323Srodrigc * Either all of the specified deltas will be applied or none of
1475153323Srodrigc * them will.  If any modified field dips below 0, then all modifications
1476153323Srodrigc * will be backed out and EINVAL will be returned.
1477153323Srodrigc */
1478153323Srodrigcint
1479153323Srodrigcxfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
1480153323Srodrigc{
1481153323Srodrigc	unsigned long	s;
1482153323Srodrigc	int		status=0;
1483153323Srodrigc	xfs_mod_sb_t	*msbp;
1484153323Srodrigc
1485153323Srodrigc	/*
1486153323Srodrigc	 * Loop through the array of mod structures and apply each
1487153323Srodrigc	 * individually.  If any fail, then back out all those
1488153323Srodrigc	 * which have already been applied.  Do all of this within
1489153323Srodrigc	 * the scope of the SB_LOCK so that all of the changes will
1490153323Srodrigc	 * be atomic.
1491153323Srodrigc	 */
1492153323Srodrigc	s = XFS_SB_LOCK(mp);
1493153323Srodrigc	msbp = &msb[0];
1494153323Srodrigc	for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) {
1495153323Srodrigc		/*
1496153323Srodrigc		 * Apply the delta at index n.  If it fails, break
1497153323Srodrigc		 * from the loop so we'll fall into the undo loop
1498153323Srodrigc		 * below.
1499153323Srodrigc		 */
1500159451Srodrigc		switch (msbp->msb_field) {
1501159451Srodrigc#ifdef HAVE_PERCPU_SB
1502159451Srodrigc		case XFS_SBS_ICOUNT:
1503159451Srodrigc		case XFS_SBS_IFREE:
1504159451Srodrigc		case XFS_SBS_FDBLOCKS:
1505159451Srodrigc			if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
1506159451Srodrigc				status = xfs_icsb_modify_counters_locked(mp,
1507159451Srodrigc							msbp->msb_field,
1508159451Srodrigc							msbp->msb_delta, rsvd);
1509159451Srodrigc				break;
1510159451Srodrigc			}
1511159451Srodrigc			/* FALLTHROUGH */
1512159451Srodrigc#endif
1513159451Srodrigc		default:
1514159451Srodrigc			status = xfs_mod_incore_sb_unlocked(mp,
1515159451Srodrigc						msbp->msb_field,
1516159451Srodrigc						msbp->msb_delta, rsvd);
1517159451Srodrigc			break;
1518159451Srodrigc		}
1519159451Srodrigc
1520153323Srodrigc		if (status != 0) {
1521153323Srodrigc			break;
1522153323Srodrigc		}
1523153323Srodrigc	}
1524153323Srodrigc
1525153323Srodrigc	/*
1526153323Srodrigc	 * If we didn't complete the loop above, then back out
1527153323Srodrigc	 * any changes made to the superblock.  If you add code
1528153323Srodrigc	 * between the loop above and here, make sure that you
1529153323Srodrigc	 * preserve the value of status. Loop back until
1530153323Srodrigc	 * we step below the beginning of the array.  Make sure
1531153323Srodrigc	 * we don't touch anything back there.
1532153323Srodrigc	 */
1533153323Srodrigc	if (status != 0) {
1534153323Srodrigc		msbp--;
1535153323Srodrigc		while (msbp >= msb) {
1536159451Srodrigc			switch (msbp->msb_field) {
1537159451Srodrigc#ifdef HAVE_PERCPU_SB
1538159451Srodrigc			case XFS_SBS_ICOUNT:
1539159451Srodrigc			case XFS_SBS_IFREE:
1540159451Srodrigc			case XFS_SBS_FDBLOCKS:
1541159451Srodrigc				if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
1542159451Srodrigc					status =
1543159451Srodrigc					    xfs_icsb_modify_counters_locked(mp,
1544159451Srodrigc							msbp->msb_field,
1545159451Srodrigc							-(msbp->msb_delta),
1546159451Srodrigc							rsvd);
1547159451Srodrigc					break;
1548159451Srodrigc				}
1549159451Srodrigc				/* FALLTHROUGH */
1550159451Srodrigc#endif
1551159451Srodrigc			default:
1552159451Srodrigc				status = xfs_mod_incore_sb_unlocked(mp,
1553159451Srodrigc							msbp->msb_field,
1554159451Srodrigc							-(msbp->msb_delta),
1555159451Srodrigc							rsvd);
1556159451Srodrigc				break;
1557159451Srodrigc			}
1558153323Srodrigc			ASSERT(status == 0);
1559153323Srodrigc			msbp--;
1560153323Srodrigc		}
1561153323Srodrigc	}
1562153323Srodrigc	XFS_SB_UNLOCK(mp, s);
1563159451Srodrigc	return status;
1564153323Srodrigc}
1565153323Srodrigc
1566153323Srodrigc/*
1567153323Srodrigc * xfs_getsb() is called to obtain the buffer for the superblock.
1568153323Srodrigc * The buffer is returned locked and read in from disk.
1569153323Srodrigc * The buffer should be released with a call to xfs_brelse().
1570153323Srodrigc *
1571153323Srodrigc * If the flags parameter is BUF_TRYLOCK, then we'll only return
1572153323Srodrigc * the superblock buffer if it can be locked without sleeping.
1573153323Srodrigc * If it can't then we'll return NULL.
1574153323Srodrigc */
1575153323Srodrigcxfs_buf_t *
1576153323Srodrigcxfs_getsb(
1577153323Srodrigc	xfs_mount_t	*mp,
1578153323Srodrigc	int		flags)
1579153323Srodrigc{
1580153323Srodrigc	xfs_buf_t	*bp;
1581159451Srodrigc	int		extra_flags = 0;
1582159451Srodrigc	unsigned int	sector_size;
1583153323Srodrigc
1584159451Srodrigc
1585153323Srodrigc	bp = mp->m_sb_bp;
1586159451Srodrigc	sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
1587159451Srodrigc#ifdef NOT
1588159451Srodrigc	/* MANAGED buf's appear broken in FreeBSD
1589159451Srodrigc	 * but it's unclear if we need a persistant superblock?
1590159451Srodrigc	 * since we now translate the ondisk superblock to
1591159451Srodrigc	 * a separate translated structure and then translate that
1592159451Srodrigc	 * structure back when we want to write the superblock
1593159451Srodrigc	 */
1594159451Srodrigc	extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED;
1595159451Srodrigc	extra_flags = XFS_BUF_MANAGE;
1596159451Srodrigc#endif
1597159451Srodrigc
1598159451Srodrigc	mp->m_sb_bp = bp
1599159451Srodrigc	  = xfs_buf_read_flags(mp->m_ddev_targp,
1600159451Srodrigc			       XFS_SB_DADDR,
1601159451Srodrigc			       BTOBB(sector_size),
1602159451Srodrigc			       extra_flags);
1603159451Srodrigc
1604153323Srodrigc	XFS_BUF_HOLD(bp);
1605153323Srodrigc	ASSERT(XFS_BUF_ISDONE(bp));
1606159451Srodrigc	if (!XFS_BUF_ISDONE(bp)){
1607159451Srodrigc		printf("xfs_getsb: %p bp flags 0x%x\n",bp,bp->b_flags);
1608159451Srodrigc	}
1609159451Srodrigc	return bp;
1610153323Srodrigc}
1611153323Srodrigc
1612153323Srodrigc/*
1613153323Srodrigc * Used to free the superblock along various error paths.
1614153323Srodrigc */
1615153323Srodrigcvoid
1616153323Srodrigcxfs_freesb(
1617153323Srodrigc	xfs_mount_t	*mp)
1618153323Srodrigc{
1619153323Srodrigc	xfs_buf_t	*bp;
1620153323Srodrigc
1621153323Srodrigc	/*
1622153323Srodrigc	 * Use xfs_getsb() so that the buffer will be locked
1623153323Srodrigc	 * when we call xfs_buf_relse().
1624153323Srodrigc	 */
1625153323Srodrigc	bp = xfs_getsb(mp, 0);
1626153323Srodrigc	XFS_BUF_UNMANAGE(bp);
1627153323Srodrigc	xfs_buf_relse(bp);
1628153323Srodrigc	mp->m_sb_bp = NULL;
1629153323Srodrigc}
1630153323Srodrigc
1631153323Srodrigc/*
1632153323Srodrigc * See if the UUID is unique among mounted XFS filesystems.
1633153323Srodrigc * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
1634153323Srodrigc */
1635153323SrodrigcSTATIC int
1636153323Srodrigcxfs_uuid_mount(
1637153323Srodrigc	xfs_mount_t	*mp)
1638153323Srodrigc{
1639153323Srodrigc	if (uuid_is_nil(&mp->m_sb.sb_uuid)) {
1640153323Srodrigc		cmn_err(CE_WARN,
1641153323Srodrigc			"XFS: Filesystem %s has nil UUID - can't mount",
1642153323Srodrigc			mp->m_fsname);
1643153323Srodrigc		return -1;
1644153323Srodrigc	}
1645153323Srodrigc	if (!uuid_table_insert(&mp->m_sb.sb_uuid)) {
1646153323Srodrigc		cmn_err(CE_WARN,
1647153323Srodrigc			"XFS: Filesystem %s has duplicate UUID - can't mount",
1648153323Srodrigc			mp->m_fsname);
1649153323Srodrigc		return -1;
1650153323Srodrigc	}
1651153323Srodrigc	return 0;
1652153323Srodrigc}
1653153323Srodrigc
1654153323Srodrigc/*
1655153323Srodrigc * Remove filesystem from the UUID table.
1656153323Srodrigc */
1657153323SrodrigcSTATIC void
1658153323Srodrigcxfs_uuid_unmount(
1659153323Srodrigc	xfs_mount_t	*mp)
1660153323Srodrigc{
1661153323Srodrigc	uuid_table_remove(&mp->m_sb.sb_uuid);
1662153323Srodrigc}
1663153323Srodrigc
1664153323Srodrigc/*
1665153323Srodrigc * Used to log changes to the superblock unit and width fields which could
1666153323Srodrigc * be altered by the mount options. Only the first superblock is updated.
1667153323Srodrigc */
1668153323SrodrigcSTATIC void
1669153323Srodrigcxfs_mount_log_sbunit(
1670153323Srodrigc	xfs_mount_t	*mp,
1671153323Srodrigc	__int64_t	fields)
1672153323Srodrigc{
1673153323Srodrigc	xfs_trans_t	*tp;
1674153323Srodrigc
1675153323Srodrigc	ASSERT(fields & (XFS_SB_UNIT|XFS_SB_WIDTH|XFS_SB_UUID));
1676153323Srodrigc
1677153323Srodrigc	tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
1678153323Srodrigc	if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
1679153323Srodrigc				XFS_DEFAULT_LOG_COUNT)) {
1680153323Srodrigc		xfs_trans_cancel(tp, 0);
1681153323Srodrigc		return;
1682153323Srodrigc	}
1683153323Srodrigc	xfs_mod_sb(tp, fields);
1684153323Srodrigc	xfs_trans_commit(tp, 0, NULL);
1685153323Srodrigc}
1686153323Srodrigc
1687159451Srodrigc
1688159451Srodrigc#ifdef HAVE_PERCPU_SB
1689159451Srodrigc/*
1690159451Srodrigc * Per-cpu incore superblock counters
1691159451Srodrigc *
1692159451Srodrigc * Simple concept, difficult implementation
1693159451Srodrigc *
1694159451Srodrigc * Basically, replace the incore superblock counters with a distributed per cpu
1695159451Srodrigc * counter for contended fields (e.g.  free block count).
1696159451Srodrigc *
1697159451Srodrigc * Difficulties arise in that the incore sb is used for ENOSPC checking, and
1698159451Srodrigc * hence needs to be accurately read when we are running low on space. Hence
1699159451Srodrigc * there is a method to enable and disable the per-cpu counters based on how
1700159451Srodrigc * much "stuff" is available in them.
1701159451Srodrigc *
1702159451Srodrigc * Basically, a counter is enabled if there is enough free resource to justify
1703159451Srodrigc * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
1704159451Srodrigc * ENOSPC), then we disable the counters to synchronise all callers and
1705159451Srodrigc * re-distribute the available resources.
1706159451Srodrigc *
1707159451Srodrigc * If, once we redistributed the available resources, we still get a failure,
1708159451Srodrigc * we disable the per-cpu counter and go through the slow path.
1709159451Srodrigc *
1710159451Srodrigc * The slow path is the current xfs_mod_incore_sb() function.  This means that
1711159451Srodrigc * when we disable a per-cpu counter, we need to drain it's resources back to
1712159451Srodrigc * the global superblock. We do this after disabling the counter to prevent
1713159451Srodrigc * more threads from queueing up on the counter.
1714159451Srodrigc *
1715159451Srodrigc * Essentially, this means that we still need a lock in the fast path to enable
1716159451Srodrigc * synchronisation between the global counters and the per-cpu counters. This
1717159451Srodrigc * is not a problem because the lock will be local to a CPU almost all the time
1718159451Srodrigc * and have little contention except when we get to ENOSPC conditions.
1719159451Srodrigc *
1720159451Srodrigc * Basically, this lock becomes a barrier that enables us to lock out the fast
1721159451Srodrigc * path while we do things like enabling and disabling counters and
1722159451Srodrigc * synchronising the counters.
1723159451Srodrigc *
1724159451Srodrigc * Locking rules:
1725159451Srodrigc *
1726159451Srodrigc * 	1. XFS_SB_LOCK() before picking up per-cpu locks
1727159451Srodrigc * 	2. per-cpu locks always picked up via for_each_online_cpu() order
1728159451Srodrigc * 	3. accurate counter sync requires XFS_SB_LOCK + per cpu locks
1729159451Srodrigc * 	4. modifying per-cpu counters requires holding per-cpu lock
1730159451Srodrigc * 	5. modifying global counters requires holding XFS_SB_LOCK
1731159451Srodrigc *	6. enabling or disabling a counter requires holding the XFS_SB_LOCK
1732159451Srodrigc *	   and _none_ of the per-cpu locks.
1733159451Srodrigc *
1734159451Srodrigc * Disabled counters are only ever re-enabled by a balance operation
1735159451Srodrigc * that results in more free resources per CPU than a given threshold.
1736159451Srodrigc * To ensure counters don't remain disabled, they are rebalanced when
1737159451Srodrigc * the global resource goes above a higher threshold (i.e. some hysteresis
1738159451Srodrigc * is present to prevent thrashing).
1739153323Srodrigc */
1740153323Srodrigc
1741159451Srodrigc/*
1742159451Srodrigc * hot-plug CPU notifier support.
1743159451Srodrigc *
1744159451Srodrigc * We cannot use the hotcpu_register() function because it does
1745159451Srodrigc * not allow notifier instances. We need a notifier per filesystem
1746159451Srodrigc * as we need to be able to identify the filesystem to balance
1747159451Srodrigc * the counters out. This is achieved by having a notifier block
1748159451Srodrigc * embedded in the xfs_mount_t and doing pointer magic to get the
1749159451Srodrigc * mount pointer from the notifier block address.
1750159451Srodrigc */
1751159451SrodrigcSTATIC int
1752159451Srodrigcxfs_icsb_cpu_notify(
1753159451Srodrigc	struct notifier_block *nfb,
1754159451Srodrigc	unsigned long action,
1755159451Srodrigc	void *hcpu)
1756159451Srodrigc{
1757159451Srodrigc	xfs_icsb_cnts_t *cntp;
1758159451Srodrigc	xfs_mount_t	*mp;
1759159451Srodrigc	int		s;
1760159451Srodrigc
1761159451Srodrigc	mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
1762159451Srodrigc	cntp = (xfs_icsb_cnts_t *)
1763159451Srodrigc			per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
1764159451Srodrigc	switch (action) {
1765159451Srodrigc	case CPU_UP_PREPARE:
1766159451Srodrigc		/* Easy Case - initialize the area and locks, and
1767159451Srodrigc		 * then rebalance when online does everything else for us. */
1768159451Srodrigc		memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1769159451Srodrigc		break;
1770159451Srodrigc	case CPU_ONLINE:
1771159451Srodrigc		xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
1772159451Srodrigc		xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
1773159451Srodrigc		xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
1774159451Srodrigc		break;
1775159451Srodrigc	case CPU_DEAD:
1776159451Srodrigc		/* Disable all the counters, then fold the dead cpu's
1777159451Srodrigc		 * count into the total on the global superblock and
1778159451Srodrigc		 * re-enable the counters. */
1779159451Srodrigc		s = XFS_SB_LOCK(mp);
1780159451Srodrigc		xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
1781159451Srodrigc		xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
1782159451Srodrigc		xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
1783159451Srodrigc
1784159451Srodrigc		mp->m_sb.sb_icount += cntp->icsb_icount;
1785159451Srodrigc		mp->m_sb.sb_ifree += cntp->icsb_ifree;
1786159451Srodrigc		mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
1787159451Srodrigc
1788159451Srodrigc		memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1789159451Srodrigc
1790159451Srodrigc		xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, XFS_ICSB_SB_LOCKED);
1791159451Srodrigc		xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, XFS_ICSB_SB_LOCKED);
1792159451Srodrigc		xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, XFS_ICSB_SB_LOCKED);
1793159451Srodrigc		XFS_SB_UNLOCK(mp, s);
1794159451Srodrigc		break;
1795159451Srodrigc	}
1796159451Srodrigc
1797159451Srodrigc	return NOTIFY_OK;
1798159451Srodrigc}
1799159451Srodrigc
1800159451Srodrigcint
1801159451Srodrigcxfs_icsb_init_counters(
1802159451Srodrigc	xfs_mount_t	*mp)
1803159451Srodrigc{
1804159451Srodrigc	xfs_icsb_cnts_t *cntp;
1805159451Srodrigc	int		i;
1806159451Srodrigc
1807159451Srodrigc	mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
1808159451Srodrigc	if (mp->m_sb_cnts == NULL)
1809159451Srodrigc		return -ENOMEM;
1810159451Srodrigc
1811159451Srodrigc	mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
1812159451Srodrigc	mp->m_icsb_notifier.priority = 0;
1813159451Srodrigc	register_cpu_notifier(&mp->m_icsb_notifier);
1814159451Srodrigc
1815159451Srodrigc	for_each_online_cpu(i) {
1816159451Srodrigc		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1817159451Srodrigc		memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1818159451Srodrigc	}
1819159451Srodrigc	/*
1820159451Srodrigc	 * start with all counters disabled so that the
1821159451Srodrigc	 * initial balance kicks us off correctly
1822159451Srodrigc	 */
1823159451Srodrigc	mp->m_icsb_counters = -1;
1824159451Srodrigc	return 0;
1825159451Srodrigc}
1826159451Srodrigc
1827159451SrodrigcSTATIC void
1828159451Srodrigcxfs_icsb_destroy_counters(
1829159451Srodrigc	xfs_mount_t	*mp)
1830159451Srodrigc{
1831159451Srodrigc	if (mp->m_sb_cnts) {
1832159451Srodrigc		unregister_cpu_notifier(&mp->m_icsb_notifier);
1833159451Srodrigc		free_percpu(mp->m_sb_cnts);
1834159451Srodrigc	}
1835159451Srodrigc}
1836159451Srodrigc
1837159451SrodrigcSTATIC inline void
1838159451Srodrigcxfs_icsb_lock_cntr(
1839159451Srodrigc	xfs_icsb_cnts_t	*icsbp)
1840159451Srodrigc{
1841159451Srodrigc	while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
1842159451Srodrigc		ndelay(1000);
1843159451Srodrigc	}
1844159451Srodrigc}
1845159451Srodrigc
1846159451SrodrigcSTATIC inline void
1847159451Srodrigcxfs_icsb_unlock_cntr(
1848159451Srodrigc	xfs_icsb_cnts_t	*icsbp)
1849159451Srodrigc{
1850159451Srodrigc	clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
1851159451Srodrigc}
1852159451Srodrigc
1853159451Srodrigc
1854159451SrodrigcSTATIC inline void
1855159451Srodrigcxfs_icsb_lock_all_counters(
1856159451Srodrigc	xfs_mount_t	*mp)
1857159451Srodrigc{
1858159451Srodrigc	xfs_icsb_cnts_t *cntp;
1859159451Srodrigc	int		i;
1860159451Srodrigc
1861159451Srodrigc	for_each_online_cpu(i) {
1862159451Srodrigc		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1863159451Srodrigc		xfs_icsb_lock_cntr(cntp);
1864159451Srodrigc	}
1865159451Srodrigc}
1866159451Srodrigc
1867159451SrodrigcSTATIC inline void
1868159451Srodrigcxfs_icsb_unlock_all_counters(
1869159451Srodrigc	xfs_mount_t	*mp)
1870159451Srodrigc{
1871159451Srodrigc	xfs_icsb_cnts_t *cntp;
1872159451Srodrigc	int		i;
1873159451Srodrigc
1874159451Srodrigc	for_each_online_cpu(i) {
1875159451Srodrigc		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1876159451Srodrigc		xfs_icsb_unlock_cntr(cntp);
1877159451Srodrigc	}
1878159451Srodrigc}
1879159451Srodrigc
1880159451SrodrigcSTATIC void
1881159451Srodrigcxfs_icsb_count(
1882153323Srodrigc	xfs_mount_t	*mp,
1883159451Srodrigc	xfs_icsb_cnts_t	*cnt,
1884159451Srodrigc	int		flags)
1885153323Srodrigc{
1886159451Srodrigc	xfs_icsb_cnts_t *cntp;
1887159451Srodrigc	int		i;
1888153323Srodrigc
1889159451Srodrigc	memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
1890153323Srodrigc
1891159451Srodrigc	if (!(flags & XFS_ICSB_LAZY_COUNT))
1892159451Srodrigc		xfs_icsb_lock_all_counters(mp);
1893159451Srodrigc
1894159451Srodrigc	for_each_online_cpu(i) {
1895159451Srodrigc		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1896159451Srodrigc		cnt->icsb_icount += cntp->icsb_icount;
1897159451Srodrigc		cnt->icsb_ifree += cntp->icsb_ifree;
1898159451Srodrigc		cnt->icsb_fdblocks += cntp->icsb_fdblocks;
1899153323Srodrigc	}
1900159451Srodrigc
1901159451Srodrigc	if (!(flags & XFS_ICSB_LAZY_COUNT))
1902159451Srodrigc		xfs_icsb_unlock_all_counters(mp);
1903153323Srodrigc}
1904153323Srodrigc
1905159451SrodrigcSTATIC int
1906159451Srodrigcxfs_icsb_counter_disabled(
1907159451Srodrigc	xfs_mount_t	*mp,
1908159451Srodrigc	xfs_sb_field_t	field)
1909159451Srodrigc{
1910159451Srodrigc	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1911159451Srodrigc	return test_bit(field, &mp->m_icsb_counters);
1912159451Srodrigc}
1913159451Srodrigc
1914159451SrodrigcSTATIC int
1915159451Srodrigcxfs_icsb_disable_counter(
1916159451Srodrigc	xfs_mount_t	*mp,
1917159451Srodrigc	xfs_sb_field_t	field)
1918159451Srodrigc{
1919159451Srodrigc	xfs_icsb_cnts_t	cnt;
1920159451Srodrigc
1921159451Srodrigc	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1922159451Srodrigc
1923159451Srodrigc	xfs_icsb_lock_all_counters(mp);
1924159451Srodrigc	if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
1925159451Srodrigc		/* drain back to superblock */
1926159451Srodrigc
1927159451Srodrigc		xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT);
1928159451Srodrigc		switch(field) {
1929159451Srodrigc		case XFS_SBS_ICOUNT:
1930159451Srodrigc			mp->m_sb.sb_icount = cnt.icsb_icount;
1931159451Srodrigc			break;
1932159451Srodrigc		case XFS_SBS_IFREE:
1933159451Srodrigc			mp->m_sb.sb_ifree = cnt.icsb_ifree;
1934159451Srodrigc			break;
1935159451Srodrigc		case XFS_SBS_FDBLOCKS:
1936159451Srodrigc			mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
1937159451Srodrigc			break;
1938159451Srodrigc		default:
1939159451Srodrigc			BUG();
1940159451Srodrigc		}
1941159451Srodrigc	}
1942159451Srodrigc
1943159451Srodrigc	xfs_icsb_unlock_all_counters(mp);
1944159451Srodrigc
1945159451Srodrigc	return 0;
1946159451Srodrigc}
1947159451Srodrigc
1948159451SrodrigcSTATIC void
1949159451Srodrigcxfs_icsb_enable_counter(
1950159451Srodrigc	xfs_mount_t	*mp,
1951159451Srodrigc	xfs_sb_field_t	field,
1952159451Srodrigc	uint64_t	count,
1953159451Srodrigc	uint64_t	resid)
1954159451Srodrigc{
1955159451Srodrigc	xfs_icsb_cnts_t	*cntp;
1956159451Srodrigc	int		i;
1957159451Srodrigc
1958159451Srodrigc	ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1959159451Srodrigc
1960159451Srodrigc	xfs_icsb_lock_all_counters(mp);
1961159451Srodrigc	for_each_online_cpu(i) {
1962159451Srodrigc		cntp = per_cpu_ptr(mp->m_sb_cnts, i);
1963159451Srodrigc		switch (field) {
1964159451Srodrigc		case XFS_SBS_ICOUNT:
1965159451Srodrigc			cntp->icsb_icount = count + resid;
1966159451Srodrigc			break;
1967159451Srodrigc		case XFS_SBS_IFREE:
1968159451Srodrigc			cntp->icsb_ifree = count + resid;
1969159451Srodrigc			break;
1970159451Srodrigc		case XFS_SBS_FDBLOCKS:
1971159451Srodrigc			cntp->icsb_fdblocks = count + resid;
1972159451Srodrigc			break;
1973159451Srodrigc		default:
1974159451Srodrigc			BUG();
1975159451Srodrigc			break;
1976159451Srodrigc		}
1977159451Srodrigc		resid = 0;
1978159451Srodrigc	}
1979159451Srodrigc	clear_bit(field, &mp->m_icsb_counters);
1980159451Srodrigc	xfs_icsb_unlock_all_counters(mp);
1981159451Srodrigc}
1982159451Srodrigc
1983159451SrodrigcSTATIC void
1984159451Srodrigcxfs_icsb_sync_counters_int(
1985159451Srodrigc	xfs_mount_t	*mp,
1986159451Srodrigc	int		flags)
1987159451Srodrigc{
1988159451Srodrigc	xfs_icsb_cnts_t	cnt;
1989159451Srodrigc	int		s;
1990159451Srodrigc
1991159451Srodrigc	/* Pass 1: lock all counters */
1992159451Srodrigc	if ((flags & XFS_ICSB_SB_LOCKED) == 0)
1993159451Srodrigc		s = XFS_SB_LOCK(mp);
1994159451Srodrigc
1995159451Srodrigc	xfs_icsb_count(mp, &cnt, flags);
1996159451Srodrigc
1997159451Srodrigc	/* Step 3: update mp->m_sb fields */
1998159451Srodrigc	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
1999159451Srodrigc		mp->m_sb.sb_icount = cnt.icsb_icount;
2000159451Srodrigc	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
2001159451Srodrigc		mp->m_sb.sb_ifree = cnt.icsb_ifree;
2002159451Srodrigc	if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
2003159451Srodrigc		mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
2004159451Srodrigc
2005159451Srodrigc	if ((flags & XFS_ICSB_SB_LOCKED) == 0)
2006159451Srodrigc		XFS_SB_UNLOCK(mp, s);
2007159451Srodrigc}
2008159451Srodrigc
2009159451Srodrigc/*
2010159451Srodrigc * Accurate update of per-cpu counters to incore superblock
2011159451Srodrigc */
2012159451SrodrigcSTATIC void
2013159451Srodrigcxfs_icsb_sync_counters(
2014159451Srodrigc	xfs_mount_t	*mp)
2015159451Srodrigc{
2016159451Srodrigc	xfs_icsb_sync_counters_int(mp, 0);
2017159451Srodrigc}
2018159451Srodrigc
2019159451Srodrigc/*
2020159451Srodrigc * lazy addition used for things like df, background sb syncs, etc
2021159451Srodrigc */
2022153323Srodrigcvoid
2023159451Srodrigcxfs_icsb_sync_counters_lazy(
2024153323Srodrigc	xfs_mount_t	*mp)
2025153323Srodrigc{
2026159451Srodrigc	xfs_icsb_sync_counters_int(mp, XFS_ICSB_LAZY_COUNT);
2027159451Srodrigc}
2028153323Srodrigc
2029159451Srodrigc/*
2030159451Srodrigc * Balance and enable/disable counters as necessary.
2031159451Srodrigc *
2032159451Srodrigc * Thresholds for re-enabling counters are somewhat magic.
2033159451Srodrigc * inode counts are chosen to be the same number as single
2034159451Srodrigc * on disk allocation chunk per CPU, and free blocks is
2035159451Srodrigc * something far enough zero that we aren't going thrash
2036159451Srodrigc * when we get near ENOSPC.
2037159451Srodrigc */
2038159451Srodrigc#define XFS_ICSB_INO_CNTR_REENABLE	64
2039159451Srodrigc#define XFS_ICSB_FDBLK_CNTR_REENABLE	512
2040159451SrodrigcSTATIC void
2041159451Srodrigcxfs_icsb_balance_counter(
2042159451Srodrigc	xfs_mount_t	*mp,
2043159451Srodrigc	xfs_sb_field_t  field,
2044159451Srodrigc	int		flags)
2045159451Srodrigc{
2046159451Srodrigc	uint64_t	count, resid = 0;
2047159451Srodrigc	int		weight = num_online_cpus();
2048159451Srodrigc	int		s;
2049159451Srodrigc
2050159451Srodrigc	if (!(flags & XFS_ICSB_SB_LOCKED))
2051159451Srodrigc		s = XFS_SB_LOCK(mp);
2052159451Srodrigc
2053159451Srodrigc	/* disable counter and sync counter */
2054159451Srodrigc	xfs_icsb_disable_counter(mp, field);
2055159451Srodrigc
2056159451Srodrigc	/* update counters  - first CPU gets residual*/
2057159451Srodrigc	switch (field) {
2058159451Srodrigc	case XFS_SBS_ICOUNT:
2059159451Srodrigc		count = mp->m_sb.sb_icount;
2060159451Srodrigc		resid = do_div(count, weight);
2061159451Srodrigc		if (count < XFS_ICSB_INO_CNTR_REENABLE)
2062159451Srodrigc			goto out;
2063159451Srodrigc		break;
2064159451Srodrigc	case XFS_SBS_IFREE:
2065159451Srodrigc		count = mp->m_sb.sb_ifree;
2066159451Srodrigc		resid = do_div(count, weight);
2067159451Srodrigc		if (count < XFS_ICSB_INO_CNTR_REENABLE)
2068159451Srodrigc			goto out;
2069159451Srodrigc		break;
2070159451Srodrigc	case XFS_SBS_FDBLOCKS:
2071159451Srodrigc		count = mp->m_sb.sb_fdblocks;
2072159451Srodrigc		resid = do_div(count, weight);
2073159451Srodrigc		if (count < XFS_ICSB_FDBLK_CNTR_REENABLE)
2074159451Srodrigc			goto out;
2075159451Srodrigc		break;
2076159451Srodrigc	default:
2077159451Srodrigc		BUG();
2078159451Srodrigc		break;
2079153323Srodrigc	}
2080153323Srodrigc
2081159451Srodrigc	xfs_icsb_enable_counter(mp, field, count, resid);
2082159451Srodrigcout:
2083159451Srodrigc	if (!(flags & XFS_ICSB_SB_LOCKED))
2084159451Srodrigc		XFS_SB_UNLOCK(mp, s);
2085153323Srodrigc}
2086153323Srodrigc
2087159451SrodrigcSTATIC int
2088159451Srodrigcxfs_icsb_modify_counters_int(
2089153323Srodrigc	xfs_mount_t	*mp,
2090159451Srodrigc	xfs_sb_field_t	field,
2091159451Srodrigc	int		delta,
2092159451Srodrigc	int		rsvd,
2093159451Srodrigc	int		flags)
2094153323Srodrigc{
2095159451Srodrigc	xfs_icsb_cnts_t	*icsbp;
2096159451Srodrigc	long long	lcounter;	/* long counter for 64 bit fields */
2097159451Srodrigc	int		cpu, s, locked = 0;
2098159451Srodrigc	int		ret = 0, balance_done = 0;
2099153323Srodrigc
2100159451Srodrigcagain:
2101159451Srodrigc	cpu = get_cpu();
2102159451Srodrigc	icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu),
2103159451Srodrigc	xfs_icsb_lock_cntr(icsbp);
2104159451Srodrigc	if (unlikely(xfs_icsb_counter_disabled(mp, field)))
2105159451Srodrigc		goto slow_path;
2106153323Srodrigc
2107159451Srodrigc	switch (field) {
2108159451Srodrigc	case XFS_SBS_ICOUNT:
2109159451Srodrigc		lcounter = icsbp->icsb_icount;
2110159451Srodrigc		lcounter += delta;
2111159451Srodrigc		if (unlikely(lcounter < 0))
2112159451Srodrigc			goto slow_path;
2113159451Srodrigc		icsbp->icsb_icount = lcounter;
2114159451Srodrigc		break;
2115159451Srodrigc
2116159451Srodrigc	case XFS_SBS_IFREE:
2117159451Srodrigc		lcounter = icsbp->icsb_ifree;
2118159451Srodrigc		lcounter += delta;
2119159451Srodrigc		if (unlikely(lcounter < 0))
2120159451Srodrigc			goto slow_path;
2121159451Srodrigc		icsbp->icsb_ifree = lcounter;
2122159451Srodrigc		break;
2123159451Srodrigc
2124159451Srodrigc	case XFS_SBS_FDBLOCKS:
2125159451Srodrigc		BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
2126159451Srodrigc
2127159451Srodrigc		lcounter = icsbp->icsb_fdblocks;
2128159451Srodrigc		lcounter += delta;
2129159451Srodrigc		if (unlikely(lcounter < 0))
2130159451Srodrigc			goto slow_path;
2131159451Srodrigc		icsbp->icsb_fdblocks = lcounter;
2132159451Srodrigc		break;
2133159451Srodrigc	default:
2134159451Srodrigc		BUG();
2135159451Srodrigc		break;
2136153323Srodrigc	}
2137159451Srodrigc	xfs_icsb_unlock_cntr(icsbp);
2138159451Srodrigc	put_cpu();
2139159451Srodrigc	if (locked)
2140159451Srodrigc		XFS_SB_UNLOCK(mp, s);
2141159451Srodrigc	return 0;
2142153323Srodrigc
2143159451Srodrigc	/*
2144159451Srodrigc	 * The slow path needs to be run with the SBLOCK
2145159451Srodrigc	 * held so that we prevent other threads from
2146159451Srodrigc	 * attempting to run this path at the same time.
2147159451Srodrigc	 * this provides exclusion for the balancing code,
2148159451Srodrigc	 * and exclusive fallback if the balance does not
2149159451Srodrigc	 * provide enough resources to continue in an unlocked
2150159451Srodrigc	 * manner.
2151159451Srodrigc	 */
2152159451Srodrigcslow_path:
2153159451Srodrigc	xfs_icsb_unlock_cntr(icsbp);
2154159451Srodrigc	put_cpu();
2155159451Srodrigc
2156159451Srodrigc	/* need to hold superblock incase we need
2157159451Srodrigc	 * to disable a counter */
2158159451Srodrigc	if (!(flags & XFS_ICSB_SB_LOCKED)) {
2159159451Srodrigc		s = XFS_SB_LOCK(mp);
2160159451Srodrigc		locked = 1;
2161159451Srodrigc		flags |= XFS_ICSB_SB_LOCKED;
2162159451Srodrigc	}
2163159451Srodrigc	if (!balance_done) {
2164159451Srodrigc		xfs_icsb_balance_counter(mp, field, flags);
2165159451Srodrigc		balance_done = 1;
2166159451Srodrigc		goto again;
2167159451Srodrigc	} else {
2168159451Srodrigc		/*
2169159451Srodrigc		 * we might not have enough on this local
2170159451Srodrigc		 * cpu to allocate for a bulk request.
2171159451Srodrigc		 * We need to drain this field from all CPUs
2172159451Srodrigc		 * and disable the counter fastpath
2173159451Srodrigc		 */
2174159451Srodrigc		xfs_icsb_disable_counter(mp, field);
2175159451Srodrigc	}
2176159451Srodrigc
2177159451Srodrigc	ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
2178159451Srodrigc
2179159451Srodrigc	if (locked)
2180159451Srodrigc		XFS_SB_UNLOCK(mp, s);
2181159451Srodrigc	return ret;
2182153323Srodrigc}
2183153323Srodrigc
2184159451SrodrigcSTATIC int
2185159451Srodrigcxfs_icsb_modify_counters(
2186159451Srodrigc	xfs_mount_t	*mp,
2187159451Srodrigc	xfs_sb_field_t	field,
2188159451Srodrigc	int		delta,
2189159451Srodrigc	int		rsvd)
2190159451Srodrigc{
2191159451Srodrigc	return xfs_icsb_modify_counters_int(mp, field, delta, rsvd, 0);
2192159451Srodrigc}
2193159451Srodrigc
2194159451Srodrigc/*
2195159451Srodrigc * Called when superblock is already locked
2196159451Srodrigc */
2197159451SrodrigcSTATIC int
2198159451Srodrigcxfs_icsb_modify_counters_locked(
2199159451Srodrigc	xfs_mount_t	*mp,
2200159451Srodrigc	xfs_sb_field_t	field,
2201159451Srodrigc	int		delta,
2202159451Srodrigc	int		rsvd)
2203159451Srodrigc{
2204159451Srodrigc	return xfs_icsb_modify_counters_int(mp, field, delta,
2205159451Srodrigc						rsvd, XFS_ICSB_SB_LOCKED);
2206159451Srodrigc}
2207159451Srodrigc#endif
2208