1153323Srodrigc/*
2159451Srodrigc * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3159451Srodrigc * All Rights Reserved.
4153323Srodrigc *
5159451Srodrigc * This program is free software; you can redistribute it and/or
6159451Srodrigc * modify it under the terms of the GNU General Public License as
7153323Srodrigc * published by the Free Software Foundation.
8153323Srodrigc *
9159451Srodrigc * This program is distributed in the hope that it would be useful,
10159451Srodrigc * but WITHOUT ANY WARRANTY; without even the implied warranty of
11159451Srodrigc * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12159451Srodrigc * GNU General Public License for more details.
13153323Srodrigc *
14159451Srodrigc * You should have received a copy of the GNU General Public License
15159451Srodrigc * along with this program; if not, write the Free Software Foundation,
16159451Srodrigc * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17153323Srodrigc */
18153323Srodrigc#include "xfs.h"
19159451Srodrigc#include "xfs_fs.h"
20153323Srodrigc#include "xfs_types.h"
21159451Srodrigc#include "xfs_bit.h"
22159451Srodrigc#include "xfs_log.h"
23153323Srodrigc#include "xfs_inum.h"
24153323Srodrigc#include "xfs_trans.h"
25153323Srodrigc#include "xfs_sb.h"
26159451Srodrigc#include "xfs_ag.h"
27153323Srodrigc#include "xfs_dir.h"
28153323Srodrigc#include "xfs_dir2.h"
29153323Srodrigc#include "xfs_dmapi.h"
30153323Srodrigc#include "xfs_mount.h"
31159451Srodrigc#include "xfs_da_btree.h"
32153323Srodrigc#include "xfs_bmap_btree.h"
33153323Srodrigc#include "xfs_ialloc_btree.h"
34153323Srodrigc#include "xfs_alloc_btree.h"
35153323Srodrigc#include "xfs_dir_sf.h"
36153323Srodrigc#include "xfs_dir2_sf.h"
37159451Srodrigc#include "xfs_attr_sf.h"
38153323Srodrigc#include "xfs_dinode.h"
39159451Srodrigc#include "xfs_inode.h"
40153323Srodrigc#include "xfs_inode_item.h"
41159451Srodrigc#include "xfs_btree.h"
42159451Srodrigc#include "xfs_alloc.h"
43159451Srodrigc#include "xfs_ialloc.h"
44159451Srodrigc#include "xfs_quota.h"
45153323Srodrigc#include "xfs_error.h"
46153323Srodrigc#include "xfs_bmap.h"
47153323Srodrigc#include "xfs_rw.h"
48153323Srodrigc#include "xfs_refcache.h"
49153323Srodrigc#include "xfs_buf_item.h"
50159451Srodrigc#include "xfs_log_priv.h"
51159451Srodrigc#include "xfs_dir2_trace.h"
52153323Srodrigc#include "xfs_extfree_item.h"
53153323Srodrigc#include "xfs_acl.h"
54153323Srodrigc#include "xfs_attr.h"
55153323Srodrigc#include "xfs_clnt.h"
56159451Srodrigc#include "xfs_fsops.h"
57159451Srodrigc#include "xfs_vnode.h"
58153323Srodrigc
59159451SrodrigcSTATIC int	xfs_sync(bhv_desc_t *, int, cred_t *);
60153323Srodrigc
61153323Srodrigc	extern kmem_zone_t	*xfs_bmap_free_item_zone;
62153323Srodrigc	extern kmem_zone_t	*xfs_btree_cur_zone;
63153323Srodrigc	extern kmem_zone_t	*xfs_trans_zone;
64153323Srodrigc	extern kmem_zone_t	*xfs_dabuf_zone;
65153323Srodrigc	extern kmem_zone_t	*xfs_buf_item_zone;
66153323Srodrigc
67153323Srodrigc#ifdef XFS_DABUF_DEBUG
68153323Srodrigc	extern lock_t	        xfs_dabuf_global_lock;
69153323Srodrigc#endif
70153323Srodrigc
71153323Srodrigcint
72153323Srodrigcxfs_init(void)
73153323Srodrigc{
74153323Srodrigc#if 0
75153323Srodrigc	extern kmem_zone_t	*xfs_bmap_free_item_zone;
76153323Srodrigc	extern kmem_zone_t	*xfs_btree_cur_zone;
77153323Srodrigc	extern kmem_zone_t	*xfs_trans_zone;
78153323Srodrigc	extern kmem_zone_t	*xfs_buf_item_zone;
79153323Srodrigc	extern kmem_zone_t	*xfs_dabuf_zone;
80153323Srodrigc#endif
81153323Srodrigc#ifdef XFS_DABUF_DEBUG
82153323Srodrigc	spinlock_init(&xfs_dabuf_global_lock, "xfsda");
83153323Srodrigc#endif
84153323Srodrigc	/*
85153323Srodrigc	 * Initialize all of the zone allocators we use.
86153323Srodrigc	 */
87153323Srodrigc	xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
88153323Srodrigc						 "xfs_bmap_free_item");
89153323Srodrigc	xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
90153323Srodrigc					    "xfs_btree_cur");
91153323Srodrigc	xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
92153323Srodrigc	xfs_da_state_zone =
93153323Srodrigc		kmem_zone_init(sizeof(xfs_da_state_t), "xfs_da_state");
94153323Srodrigc	xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
95159451Srodrigc	xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
96159451Srodrigc	xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
97153323Srodrigc
98153323Srodrigc	/*
99153323Srodrigc	 * The size of the zone allocated buf log item is the maximum
100153323Srodrigc	 * size possible under XFS.  This wastes a little bit of memory,
101153323Srodrigc	 * but it is much faster.
102153323Srodrigc	 */
103153323Srodrigc	xfs_buf_item_zone =
104153323Srodrigc		kmem_zone_init((sizeof(xfs_buf_log_item_t) +
105153323Srodrigc				(((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
106153323Srodrigc				  NBWORD) * sizeof(int))),
107153323Srodrigc			       "xfs_buf_item");
108159451Srodrigc	xfs_efd_zone =
109159451Srodrigc		kmem_zone_init((sizeof(xfs_efd_log_item_t) +
110159451Srodrigc			       ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
111159451Srodrigc				 sizeof(xfs_extent_t))),
112153323Srodrigc				      "xfs_efd_item");
113159451Srodrigc	xfs_efi_zone =
114159451Srodrigc		kmem_zone_init((sizeof(xfs_efi_log_item_t) +
115159451Srodrigc			       ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
116159451Srodrigc				 sizeof(xfs_extent_t))),
117153323Srodrigc				      "xfs_efi_item");
118153323Srodrigc
119153323Srodrigc	/*
120159451Srodrigc	 * These zones warrant special memory allocator hints
121159451Srodrigc	 */
122159451Srodrigc	xfs_inode_zone =
123159451Srodrigc		kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
124159451Srodrigc					KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
125159451Srodrigc					KM_ZONE_SPREAD, NULL);
126159451Srodrigc	xfs_ili_zone =
127159451Srodrigc		kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
128159451Srodrigc					KM_ZONE_SPREAD, NULL);
129159451Srodrigc	xfs_chashlist_zone =
130159451Srodrigc		kmem_zone_init_flags(sizeof(xfs_chashlist_t), "xfs_chashlist",
131159451Srodrigc					KM_ZONE_SPREAD, NULL);
132159451Srodrigc
133159451Srodrigc	/*
134153323Srodrigc	 * Allocate global trace buffers.
135153323Srodrigc	 */
136153323Srodrigc#ifdef XFS_ALLOC_TRACE
137153323Srodrigc	xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_SLEEP);
138153323Srodrigc#endif
139153323Srodrigc#ifdef XFS_BMAP_TRACE
140153323Srodrigc	xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_SLEEP);
141153323Srodrigc#endif
142153323Srodrigc#ifdef XFS_BMBT_TRACE
143153323Srodrigc	xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_SLEEP);
144153323Srodrigc#endif
145153323Srodrigc#ifdef XFS_DIR_TRACE
146153323Srodrigc	xfs_dir_trace_buf = ktrace_alloc(XFS_DIR_TRACE_SIZE, KM_SLEEP);
147153323Srodrigc#endif
148153323Srodrigc#ifdef XFS_ATTR_TRACE
149153323Srodrigc	xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_SLEEP);
150153323Srodrigc#endif
151153323Srodrigc#ifdef XFS_DIR2_TRACE
152153323Srodrigc	xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_SLEEP);
153153323Srodrigc#endif
154153323Srodrigc
155153323Srodrigc	xfs_dir_startup();
156153323Srodrigc
157153323Srodrigc#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
158153323Srodrigc	xfs_error_test_init();
159153323Srodrigc#endif /* DEBUG || INDUCE_IO_ERROR */
160153323Srodrigc
161153323Srodrigc	xfs_refcache_init();
162153323Srodrigc	xfs_init_procfs();
163153323Srodrigc	xfs_sysctl_register();
164153323Srodrigc	return 0;
165153323Srodrigc}
166153323Srodrigc
167153323Srodrigcvoid
168153323Srodrigcxfs_cleanup(void)
169153323Srodrigc{
170153323Srodrigc#if 0
171153323Srodrigc	extern kmem_zone_t	*xfs_bmap_free_item_zone;
172153323Srodrigc	extern kmem_zone_t	*xfs_btree_cur_zone;
173153323Srodrigc	extern kmem_zone_t	*xfs_inode_zone;
174153323Srodrigc	extern kmem_zone_t	*xfs_trans_zone;
175153323Srodrigc	extern kmem_zone_t	*xfs_da_state_zone;
176153323Srodrigc	extern kmem_zone_t	*xfs_dabuf_zone;
177153323Srodrigc	extern kmem_zone_t	*xfs_efd_zone;
178153323Srodrigc	extern kmem_zone_t	*xfs_efi_zone;
179153323Srodrigc	extern kmem_zone_t	*xfs_buf_item_zone;
180153323Srodrigc	extern kmem_zone_t	*xfs_chashlist_zone;
181153323Srodrigc#endif
182153323Srodrigc
183153323Srodrigc	xfs_cleanup_procfs();
184153323Srodrigc	xfs_sysctl_unregister();
185153323Srodrigc	xfs_refcache_destroy();
186159451Srodrigc	xfs_acl_zone_destroy(xfs_acl_zone);
187153323Srodrigc
188153323Srodrigc#ifdef XFS_DIR2_TRACE
189153323Srodrigc	ktrace_free(xfs_dir2_trace_buf);
190153323Srodrigc#endif
191153323Srodrigc#ifdef XFS_ATTR_TRACE
192153323Srodrigc	ktrace_free(xfs_attr_trace_buf);
193153323Srodrigc#endif
194153323Srodrigc#ifdef XFS_DIR_TRACE
195153323Srodrigc	ktrace_free(xfs_dir_trace_buf);
196153323Srodrigc#endif
197153323Srodrigc#ifdef XFS_BMBT_TRACE
198153323Srodrigc	ktrace_free(xfs_bmbt_trace_buf);
199153323Srodrigc#endif
200153323Srodrigc#ifdef XFS_BMAP_TRACE
201153323Srodrigc	ktrace_free(xfs_bmap_trace_buf);
202153323Srodrigc#endif
203153323Srodrigc#ifdef XFS_ALLOC_TRACE
204153323Srodrigc	ktrace_free(xfs_alloc_trace_buf);
205153323Srodrigc#endif
206153323Srodrigc
207159451Srodrigc	kmem_zone_destroy(xfs_bmap_free_item_zone);
208159451Srodrigc	kmem_zone_destroy(xfs_btree_cur_zone);
209159451Srodrigc	kmem_zone_destroy(xfs_inode_zone);
210159451Srodrigc	kmem_zone_destroy(xfs_trans_zone);
211159451Srodrigc	kmem_zone_destroy(xfs_da_state_zone);
212159451Srodrigc	kmem_zone_destroy(xfs_dabuf_zone);
213159451Srodrigc	kmem_zone_destroy(xfs_buf_item_zone);
214159451Srodrigc	kmem_zone_destroy(xfs_efd_zone);
215159451Srodrigc	kmem_zone_destroy(xfs_efi_zone);
216159451Srodrigc	kmem_zone_destroy(xfs_ifork_zone);
217159451Srodrigc	kmem_zone_destroy(xfs_ili_zone);
218159451Srodrigc	kmem_zone_destroy(xfs_chashlist_zone);
219153323Srodrigc}
220153323Srodrigc
221153323Srodrigc/*
222153323Srodrigc * xfs_start_flags
223153323Srodrigc *
224153323Srodrigc * This function fills in xfs_mount_t fields based on mount args.
225153323Srodrigc * Note: the superblock has _not_ yet been read in.
226153323Srodrigc */
227153323SrodrigcSTATIC int
228153323Srodrigcxfs_start_flags(
229159451Srodrigc	struct xfs_vfs		*vfs,
230153323Srodrigc	struct xfs_mount_args	*ap,
231159451Srodrigc	struct xfs_mount	*mp)
232153323Srodrigc{
233153323Srodrigc	/* Values are in BBs */
234153323Srodrigc	if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
235153323Srodrigc		/*
236153323Srodrigc		 * At this point the superblock has not been read
237153323Srodrigc		 * in, therefore we do not know the block size.
238153323Srodrigc		 * Before the mount call ends we will convert
239153323Srodrigc		 * these to FSBs.
240153323Srodrigc		 */
241153323Srodrigc		mp->m_dalign = ap->sunit;
242153323Srodrigc		mp->m_swidth = ap->swidth;
243153323Srodrigc	}
244153323Srodrigc
245159451Srodrigc	if (ap->logbufs != -1 &&
246159451Srodrigc	    ap->logbufs != 0 &&
247153323Srodrigc	    (ap->logbufs < XLOG_MIN_ICLOGS ||
248153323Srodrigc	     ap->logbufs > XLOG_MAX_ICLOGS)) {
249153323Srodrigc		cmn_err(CE_WARN,
250153323Srodrigc			"XFS: invalid logbufs value: %d [not %d-%d]",
251153323Srodrigc			ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
252153323Srodrigc		return XFS_ERROR(EINVAL);
253153323Srodrigc	}
254153323Srodrigc	mp->m_logbufs = ap->logbufs;
255153323Srodrigc	if (ap->logbufsize != -1 &&
256159451Srodrigc	    ap->logbufsize !=  0 &&
257153323Srodrigc	    ap->logbufsize != 16 * 1024 &&
258153323Srodrigc	    ap->logbufsize != 32 * 1024 &&
259153323Srodrigc	    ap->logbufsize != 64 * 1024 &&
260153323Srodrigc	    ap->logbufsize != 128 * 1024 &&
261153323Srodrigc	    ap->logbufsize != 256 * 1024) {
262153323Srodrigc		cmn_err(CE_WARN,
263153323Srodrigc	"XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
264153323Srodrigc			ap->logbufsize);
265153323Srodrigc		return XFS_ERROR(EINVAL);
266153323Srodrigc	}
267159451Srodrigc	mp->m_ihsize = ap->ihashsize;
268153323Srodrigc	mp->m_logbsize = ap->logbufsize;
269153323Srodrigc	mp->m_fsname_len = strlen(ap->fsname) + 1;
270153323Srodrigc	mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
271153323Srodrigc	strcpy(mp->m_fsname, ap->fsname);
272159451Srodrigc	if (ap->rtname[0]) {
273159451Srodrigc		mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP);
274159451Srodrigc		strcpy(mp->m_rtname, ap->rtname);
275159451Srodrigc	}
276159451Srodrigc	if (ap->logname[0]) {
277159451Srodrigc		mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP);
278159451Srodrigc		strcpy(mp->m_logname, ap->logname);
279159451Srodrigc	}
280153323Srodrigc
281153323Srodrigc	if (ap->flags & XFSMNT_WSYNC)
282153323Srodrigc		mp->m_flags |= XFS_MOUNT_WSYNC;
283153323Srodrigc#if XFS_BIG_INUMS
284153323Srodrigc	if (ap->flags & XFSMNT_INO64) {
285153323Srodrigc		mp->m_flags |= XFS_MOUNT_INO64;
286153323Srodrigc		mp->m_inoadd = XFS_INO64_OFFSET;
287153323Srodrigc	}
288153323Srodrigc#endif
289153323Srodrigc	if (ap->flags & XFSMNT_RETERR)
290153323Srodrigc		mp->m_flags |= XFS_MOUNT_RETERR;
291153323Srodrigc	if (ap->flags & XFSMNT_NOALIGN)
292153323Srodrigc		mp->m_flags |= XFS_MOUNT_NOALIGN;
293159451Srodrigc	if (ap->flags & XFSMNT_SWALLOC)
294159451Srodrigc		mp->m_flags |= XFS_MOUNT_SWALLOC;
295153323Srodrigc	if (ap->flags & XFSMNT_OSYNCISOSYNC)
296153323Srodrigc		mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
297153323Srodrigc	if (ap->flags & XFSMNT_32BITINODES)
298159451Srodrigc		mp->m_flags |= XFS_MOUNT_32BITINODES;
299153323Srodrigc
300153323Srodrigc	if (ap->flags & XFSMNT_IOSIZE) {
301153323Srodrigc		if (ap->iosizelog > XFS_MAX_IO_LOG ||
302153323Srodrigc		    ap->iosizelog < XFS_MIN_IO_LOG) {
303153323Srodrigc			cmn_err(CE_WARN,
304153323Srodrigc		"XFS: invalid log iosize: %d [not %d-%d]",
305153323Srodrigc				ap->iosizelog, XFS_MIN_IO_LOG,
306153323Srodrigc				XFS_MAX_IO_LOG);
307153323Srodrigc			return XFS_ERROR(EINVAL);
308153323Srodrigc		}
309153323Srodrigc
310153323Srodrigc		mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
311153323Srodrigc		mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
312153323Srodrigc	}
313159451Srodrigc
314159451Srodrigc	if (ap->flags & XFSMNT_IHASHSIZE)
315159451Srodrigc		mp->m_flags |= XFS_MOUNT_IHASHSIZE;
316153323Srodrigc	if (ap->flags & XFSMNT_IDELETE)
317153323Srodrigc		mp->m_flags |= XFS_MOUNT_IDELETE;
318159451Srodrigc	if (ap->flags & XFSMNT_DIRSYNC)
319159451Srodrigc		mp->m_flags |= XFS_MOUNT_DIRSYNC;
320159451Srodrigc	if (ap->flags & XFSMNT_ATTR2)
321159451Srodrigc		mp->m_flags |= XFS_MOUNT_ATTR2;
322153323Srodrigc
323159451Srodrigc	if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
324159451Srodrigc		mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
325159451Srodrigc
326153323Srodrigc	/*
327153323Srodrigc	 * no recovery flag requires a read-only mount
328153323Srodrigc	 */
329153323Srodrigc	if (ap->flags & XFSMNT_NORECOVERY) {
330159451Srodrigc		if (!(vfs->vfs_flag & VFS_RDONLY)) {
331153323Srodrigc			cmn_err(CE_WARN,
332153323Srodrigc	"XFS: tried to mount a FS read-write without recovery!");
333153323Srodrigc			return XFS_ERROR(EINVAL);
334153323Srodrigc		}
335153323Srodrigc		mp->m_flags |= XFS_MOUNT_NORECOVERY;
336153323Srodrigc	}
337153323Srodrigc
338153323Srodrigc	if (ap->flags & XFSMNT_NOUUID)
339153323Srodrigc		mp->m_flags |= XFS_MOUNT_NOUUID;
340159451Srodrigc	if (ap->flags & XFSMNT_BARRIER)
341159451Srodrigc		mp->m_flags |= XFS_MOUNT_BARRIER;
342159451Srodrigc	else
343159451Srodrigc		mp->m_flags &= ~XFS_MOUNT_BARRIER;
344153323Srodrigc
345153323Srodrigc	return 0;
346153323Srodrigc}
347153323Srodrigc
348153323Srodrigc/*
349153323Srodrigc * This function fills in xfs_mount_t fields based on mount args.
350153323Srodrigc * Note: the superblock _has_ now been read in.
351153323Srodrigc */
352153323SrodrigcSTATIC int
353153323Srodrigcxfs_finish_flags(
354159451Srodrigc	struct xfs_vfs		*vfs,
355153323Srodrigc	struct xfs_mount_args	*ap,
356159451Srodrigc	struct xfs_mount	*mp)
357153323Srodrigc{
358159451Srodrigc	int			ronly = (vfs->vfs_flag & VFS_RDONLY);
359159451Srodrigc
360153323Srodrigc	/* Fail a mount where the logbuf is smaller then the log stripe */
361153323Srodrigc	if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) {
362159451Srodrigc		if ((ap->logbufsize <= 0) &&
363153323Srodrigc		    (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
364153323Srodrigc			mp->m_logbsize = mp->m_sb.sb_logsunit;
365159451Srodrigc		} else if (ap->logbufsize > 0 &&
366159451Srodrigc			   ap->logbufsize < mp->m_sb.sb_logsunit) {
367153323Srodrigc			cmn_err(CE_WARN,
368153323Srodrigc	"XFS: logbuf size must be greater than or equal to log stripe size");
369153323Srodrigc			return XFS_ERROR(EINVAL);
370153323Srodrigc		}
371153323Srodrigc	} else {
372153323Srodrigc		/* Fail a mount if the logbuf is larger than 32K */
373153323Srodrigc		if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
374153323Srodrigc			cmn_err(CE_WARN,
375153323Srodrigc	"XFS: logbuf size for version 1 logs must be 16K or 32K");
376153323Srodrigc			return XFS_ERROR(EINVAL);
377153323Srodrigc		}
378153323Srodrigc	}
379153323Srodrigc
380159451Srodrigc	if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
381159451Srodrigc		mp->m_flags |= XFS_MOUNT_ATTR2;
382159451Srodrigc	}
383159451Srodrigc
384153323Srodrigc	/*
385153323Srodrigc	 * prohibit r/w mounts of read-only filesystems
386153323Srodrigc	 */
387153323Srodrigc	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
388153323Srodrigc		cmn_err(CE_WARN,
389153323Srodrigc	"XFS: cannot mount a read-only filesystem as read-write");
390153323Srodrigc		return XFS_ERROR(EROFS);
391153323Srodrigc	}
392153323Srodrigc
393153323Srodrigc	/*
394153323Srodrigc	 * check for shared mount.
395153323Srodrigc	 */
396153323Srodrigc	if (ap->flags & XFSMNT_SHARED) {
397153323Srodrigc		if (!XFS_SB_VERSION_HASSHARED(&mp->m_sb))
398153323Srodrigc			return XFS_ERROR(EINVAL);
399153323Srodrigc
400153323Srodrigc		/*
401153323Srodrigc		 * For IRIX 6.5, shared mounts must have the shared
402153323Srodrigc		 * version bit set, have the persistent readonly
403153323Srodrigc		 * field set, must be version 0 and can only be mounted
404153323Srodrigc		 * read-only.
405153323Srodrigc		 */
406153323Srodrigc		if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
407153323Srodrigc		     (mp->m_sb.sb_shared_vn != 0))
408153323Srodrigc			return XFS_ERROR(EINVAL);
409153323Srodrigc
410153323Srodrigc		mp->m_flags |= XFS_MOUNT_SHARED;
411153323Srodrigc
412153323Srodrigc		/*
413153323Srodrigc		 * Shared XFS V0 can't deal with DMI.  Return EINVAL.
414153323Srodrigc		 */
415153323Srodrigc		if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
416153323Srodrigc			return XFS_ERROR(EINVAL);
417153323Srodrigc	}
418153323Srodrigc
419153323Srodrigc	return 0;
420153323Srodrigc}
421153323Srodrigc
422153323Srodrigc/*
423153323Srodrigc * xfs_mount
424153323Srodrigc *
425153323Srodrigc * The file system configurations are:
426153323Srodrigc *	(1) device (partition) with data and internal log
427153323Srodrigc *	(2) logical volume with data and log subvolumes.
428153323Srodrigc *	(3) logical volume with data, log, and realtime subvolumes.
429153323Srodrigc *
430153323Srodrigc * We only have to handle opening the log and realtime volumes here if
431153323Srodrigc * they are present.  The data subvolume has already been opened by
432153323Srodrigc * get_sb_bdev() and is stored in vfsp->vfs_super->s_bdev.
433153323Srodrigc */
434153323SrodrigcSTATIC int
435153323Srodrigcxfs_mount(
436153323Srodrigc	struct bhv_desc		*bhvp,
437153323Srodrigc	struct xfs_mount_args	*args,
438153323Srodrigc	cred_t			*credp)
439153323Srodrigc{
440153323Srodrigc	struct xfs_vfs		*vfsp = bhvtovfs(bhvp);
441153323Srodrigc	struct bhv_desc		*p;
442153323Srodrigc	struct xfs_mount	*mp = XFS_BHVTOM(bhvp);
443153323Srodrigc	struct vnode		*ddev, *logdev, *rtdev;
444153323Srodrigc	int			flags = 0, error;
445153323Srodrigc
446153323Srodrigc	ddev = logdev = rtdev = NULL;
447153323Srodrigc
448153323Srodrigc	error = xfs_blkdev_get(mp, args->fsname, &ddev);
449153323Srodrigc	if (error)
450153323Srodrigc		return error;
451153323Srodrigc
452153323Srodrigc	/*
453159451Srodrigc	 * Setup xfs_mount function vectors from available behaviors
454159451Srodrigc	 */
455159451Srodrigc	p = vfs_bhv_lookup(vfsp, VFS_POSITION_DM);
456159451Srodrigc	mp->m_dm_ops = p ? *(xfs_dmops_t *) vfs_bhv_custom(p) : xfs_dmcore_stub;
457159451Srodrigc	p = vfs_bhv_lookup(vfsp, VFS_POSITION_QM);
458159451Srodrigc	mp->m_qm_ops = p ? *(xfs_qmops_t *) vfs_bhv_custom(p) : xfs_qmcore_stub;
459159451Srodrigc	p = vfs_bhv_lookup(vfsp, VFS_POSITION_IO);
460159451Srodrigc	mp->m_io_ops = p ? *(xfs_ioops_t *) vfs_bhv_custom(p) : xfs_iocore_xfs;
461159451Srodrigc
462159451Srodrigc	if (args->flags & XFSMNT_QUIET)
463159451Srodrigc		flags |= XFS_MFSI_QUIET;
464159451Srodrigc
465159451Srodrigc	/*
466153323Srodrigc	 * Open real time and log devices - order is important.
467153323Srodrigc	 */
468153323Srodrigc	if (args->logname[0]) {
469153323Srodrigc		error = xfs_blkdev_get(mp, args->logname, &logdev);
470153323Srodrigc		if (error) {
471153323Srodrigc			xfs_blkdev_put(ddev);
472153323Srodrigc			return error;
473153323Srodrigc		}
474153323Srodrigc	}
475153323Srodrigc	if (args->rtname[0]) {
476153323Srodrigc		error = xfs_blkdev_get(mp, args->rtname, &rtdev);
477153323Srodrigc		if (error) {
478153323Srodrigc			xfs_blkdev_put(logdev);
479153323Srodrigc			xfs_blkdev_put(ddev);
480153323Srodrigc			return error;
481153323Srodrigc		}
482153323Srodrigc
483153323Srodrigc		if (rtdev == ddev || rtdev == logdev) {
484153323Srodrigc			cmn_err(CE_WARN,
485153323Srodrigc	"XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
486153323Srodrigc			xfs_blkdev_put(logdev);
487153323Srodrigc			xfs_blkdev_put(rtdev);
488153323Srodrigc			xfs_blkdev_put(ddev);
489153323Srodrigc			return EINVAL;
490153323Srodrigc		}
491153323Srodrigc	}
492153323Srodrigc
493153323Srodrigc	/*
494153323Srodrigc	 * Setup xfs_mount buffer target pointers
495153323Srodrigc	 */
496159451Srodrigc	error = ENOMEM;
497159451Srodrigc	mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
498159451Srodrigc	if (!mp->m_ddev_targp) {
499159451Srodrigc		xfs_blkdev_put(logdev);
500159451Srodrigc		xfs_blkdev_put(rtdev);
501159451Srodrigc		return error;
502159451Srodrigc	}
503159451Srodrigc	if (rtdev) {
504159451Srodrigc		mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
505159451Srodrigc		if (!mp->m_rtdev_targp)
506159451Srodrigc			goto error0;
507159451Srodrigc	}
508153323Srodrigc	mp->m_logdev_targp = (logdev && logdev != ddev) ?
509159451Srodrigc				xfs_alloc_buftarg(logdev, 1) : mp->m_ddev_targp;
510159451Srodrigc	if (!mp->m_logdev_targp)
511159451Srodrigc		goto error0;
512153323Srodrigc
513153323Srodrigc	/*
514153323Srodrigc	 * Setup flags based on mount(2) options and then the superblock
515153323Srodrigc	 */
516159451Srodrigc	error = xfs_start_flags(vfsp, args, mp);
517153323Srodrigc	if (error)
518159451Srodrigc		goto error1;
519159451Srodrigc	error = xfs_readsb(mp, flags);
520153323Srodrigc	if (error)
521159451Srodrigc		goto error1;
522159451Srodrigc	error = xfs_finish_flags(vfsp, args, mp);
523159451Srodrigc	if (error)
524159451Srodrigc		goto error2;
525153323Srodrigc
526153323Srodrigc	/*
527153323Srodrigc	 * Setup xfs_mount buffer target pointers based on superblock
528153323Srodrigc	 */
529159451Srodrigc	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
530159451Srodrigc				    mp->m_sb.sb_sectsize);
531159451Srodrigc	if (!error && logdev && logdev != ddev) {
532153323Srodrigc		unsigned int	log_sector_size = BBSIZE;
533153323Srodrigc
534153323Srodrigc		if (XFS_SB_VERSION_HASSECTOR(&mp->m_sb))
535153323Srodrigc			log_sector_size = mp->m_sb.sb_logsectsize;
536159451Srodrigc		error = xfs_setsize_buftarg(mp->m_logdev_targp,
537159451Srodrigc					    mp->m_sb.sb_blocksize,
538159451Srodrigc					    log_sector_size);
539153323Srodrigc	}
540159451Srodrigc	if (!error && rtdev)
541159451Srodrigc		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
542159451Srodrigc					    mp->m_sb.sb_blocksize,
543159451Srodrigc					    mp->m_sb.sb_sectsize);
544159451Srodrigc	if (error)
545159451Srodrigc		goto error2;
546153323Srodrigc
547159451Srodrigc	if ((mp->m_flags & XFS_MOUNT_BARRIER) && !(vfsp->vfs_flag & VFS_RDONLY))
548159451Srodrigc		xfs_mountfs_check_barriers(mp);
549153323Srodrigc
550159451Srodrigc	error = XFS_IOINIT(vfsp, args, flags);
551159451Srodrigc	if (error)
552159451Srodrigc		goto error2;
553159451Srodrigc
554159451Srodrigc	return 0;
555159451Srodrigc
556159451Srodrigcerror2:
557159451Srodrigc	if (mp->m_sb_bp)
558159451Srodrigc		xfs_freesb(mp);
559159451Srodrigcerror1:
560153323Srodrigc	xfs_binval(mp->m_ddev_targp);
561159451Srodrigc	if (logdev && logdev != ddev)
562153323Srodrigc		xfs_binval(mp->m_logdev_targp);
563159451Srodrigc	if (rtdev)
564153323Srodrigc		xfs_binval(mp->m_rtdev_targp);
565159451Srodrigcerror0:
566159451Srodrigc	xfs_unmountfs_close(mp, credp);
567153323Srodrigc	return error;
568153323Srodrigc}
569153323Srodrigc
570153323SrodrigcSTATIC int
571153323Srodrigcxfs_unmount(
572153323Srodrigc	bhv_desc_t	*bdp,
573153323Srodrigc	int		flags,
574153323Srodrigc	cred_t		*credp)
575153323Srodrigc{
576153323Srodrigc	struct xfs_vfs	*vfsp = bhvtovfs(bdp);
577153323Srodrigc	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
578153323Srodrigc	xfs_inode_t	*rip;
579153323Srodrigc	xfs_vnode_t	*rvp;
580153323Srodrigc	int		unmount_event_wanted = 0;
581153323Srodrigc	int		unmount_event_flags = 0;
582153323Srodrigc	int		xfs_unmountfs_needed = 0;
583153323Srodrigc	int		error;
584153323Srodrigc
585153323Srodrigc	rip = mp->m_rootip;
586153323Srodrigc	rvp = XFS_ITOV(rip);
587153323Srodrigc
588153323Srodrigc	if (vfsp->vfs_flag & VFS_DMI) {
589159451Srodrigc		error = XFS_SEND_PREUNMOUNT(mp, vfsp,
590153323Srodrigc				rvp, DM_RIGHT_NULL, rvp, DM_RIGHT_NULL,
591153323Srodrigc				NULL, NULL, 0, 0,
592153323Srodrigc				(mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))?
593153323Srodrigc					0:DM_FLAGS_UNWANTED);
594153323Srodrigc			if (error)
595153323Srodrigc				return XFS_ERROR(error);
596153323Srodrigc		unmount_event_wanted = 1;
597153323Srodrigc		unmount_event_flags = (mp->m_dmevmask & (1<<DM_EVENT_UNMOUNT))?
598153323Srodrigc					0 : DM_FLAGS_UNWANTED;
599153323Srodrigc	}
600153323Srodrigc
601153323Srodrigc	/*
602153323Srodrigc	 * Linux (& presumably Irix) do not reach this code if
603153323Srodrigc	 * any of this FS vnodes have active references. FreeBSD
604153323Srodrigc	 * relies on FS to clean after itself.
605153323Srodrigc	 */
606159451Srodrigc	xfs_iflush_all(mp);
607153323Srodrigc
608153323Srodrigc	/*
609153323Srodrigc	 * First blow any referenced inode from this file system
610153323Srodrigc	 * out of the reference cache, and delete the timer.
611153323Srodrigc	 */
612153323Srodrigc	xfs_refcache_purge_mp(mp);
613153323Srodrigc
614153323Srodrigc	XFS_bflush(mp->m_ddev_targp);
615153323Srodrigc	error = xfs_unmount_flush(mp, 0);
616153323Srodrigc	if (error)
617153323Srodrigc		goto out;
618153323Srodrigc
619153323Srodrigc	ASSERT(vn_count(rvp) == 1);
620153323Srodrigc
621153323Srodrigc	/*
622153323Srodrigc	 * Drop the reference count
623153323Srodrigc	 */
624153323Srodrigc	VN_RELE(rvp);
625153323Srodrigc
626153323Srodrigc	/*
627153323Srodrigc	 * If we're forcing a shutdown, typically because of a media error,
628153323Srodrigc	 * we want to make sure we invalidate dirty pages that belong to
629153323Srodrigc	 * referenced vnodes as well.
630153323Srodrigc	 */
631153323Srodrigc	if (XFS_FORCED_SHUTDOWN(mp)) {
632153323Srodrigc		error = xfs_sync(&mp->m_bhv,
633153323Srodrigc			 (SYNC_WAIT | SYNC_CLOSE), credp);
634153323Srodrigc		ASSERT(error != EFSCORRUPTED);
635153323Srodrigc	}
636153323Srodrigc	xfs_unmountfs_needed = 1;
637153323Srodrigc
638153323Srodrigcout:
639153323Srodrigc	/*	Send DMAPI event, if required.
640153323Srodrigc	 *	Then do xfs_unmountfs() if needed.
641153323Srodrigc	 *	Then return error (or zero).
642153323Srodrigc	 */
643153323Srodrigc	if (unmount_event_wanted) {
644153323Srodrigc		/* Note: mp structure must still exist for
645153323Srodrigc		 * XFS_SEND_UNMOUNT() call.
646153323Srodrigc		 */
647153323Srodrigc		XFS_SEND_UNMOUNT(mp, vfsp, error == 0 ? rvp : NULL,
648153323Srodrigc			DM_RIGHT_NULL, 0, error, unmount_event_flags);
649153323Srodrigc	}
650153323Srodrigc	if (xfs_unmountfs_needed) {
651153323Srodrigc		/*
652153323Srodrigc		 * Call common unmount function to flush to disk
653153323Srodrigc		 * and free the super block buffer & mount structures.
654153323Srodrigc		 */
655153323Srodrigc		xfs_unmountfs(mp, credp);
656153323Srodrigc	}
657153323Srodrigc
658153323Srodrigc	return XFS_ERROR(error);
659153323Srodrigc}
660153323Srodrigc
661159451SrodrigcSTATIC int
662159451Srodrigcxfs_quiesce_fs(
663159451Srodrigc	xfs_mount_t		*mp)
664159451Srodrigc{
665159451Srodrigc	int			count = 0, pincount;
666153323Srodrigc
667159451Srodrigc	xfs_refcache_purge_mp(mp);
668159451Srodrigc	xfs_flush_buftarg(mp->m_ddev_targp, 0);
669159451Srodrigc	xfs_finish_reclaim_all(mp, 0);
670159451Srodrigc
671159451Srodrigc	/* This loop must run at least twice.
672159451Srodrigc	 * The first instance of the loop will flush
673159451Srodrigc	 * most meta data but that will generate more
674159451Srodrigc	 * meta data (typically directory updates).
675159451Srodrigc	 * Which then must be flushed and logged before
676159451Srodrigc	 * we can write the unmount record.
677159451Srodrigc	 */
678159451Srodrigc	do {
679159451Srodrigc		xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, 0, NULL);
680159451Srodrigc		pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
681159451Srodrigc		if (!pincount) {
682159451Srodrigc			delay(50);
683159451Srodrigc			count++;
684159451Srodrigc		}
685159451Srodrigc	} while (count < 2);
686159451Srodrigc
687159451Srodrigc	return 0;
688159451Srodrigc}
689159451Srodrigc
690153323Srodrigc/* XXXKAN */
691153323Srodrigc#define pagebuf_delwri_flush(a,b,c)			\
692153323Srodrigc	do {						\
693153323Srodrigc		printf("pagebuf_delwri_flush NI\n");	\
694153323Srodrigc		if (c) *((int *)(c)) = 0;		\
695153323Srodrigc	} while(0)
696153323Srodrigc
697153323SrodrigcSTATIC int
698153323Srodrigcxfs_mntupdate(
699153323Srodrigc	bhv_desc_t			*bdp,
700153323Srodrigc	int				*flags,
701153323Srodrigc	struct xfs_mount_args		*args)
702153323Srodrigc{
703153323Srodrigc	struct xfs_vfs	*vfsp = bhvtovfs(bdp);
704153323Srodrigc	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
705159451Srodrigc	int		error;
706153323Srodrigc
707159451Srodrigc#ifdef RMC
708159451Srodrigc	if (!(*flags & MS_RDONLY)) {			/* rw/ro -> rw */
709159451Srodrigc#endif
710159451Srodrigc	  if (!(*flags & VFS_RDONLY)) {			/* rw/ro -> rw */
711159451Srodrigc		if (vfsp->vfs_flag & VFS_RDONLY)
712159451Srodrigc			vfsp->vfs_flag &= ~VFS_RDONLY;
713159451Srodrigc		if (args->flags & XFSMNT_BARRIER) {
714159451Srodrigc			mp->m_flags |= XFS_MOUNT_BARRIER;
715159451Srodrigc			xfs_mountfs_check_barriers(mp);
716159451Srodrigc		} else {
717159451Srodrigc			mp->m_flags &= ~XFS_MOUNT_BARRIER;
718159451Srodrigc		}
719159451Srodrigc	} else if (!(vfsp->vfs_flag & VFS_RDONLY)) {	/* rw -> ro */
720153323Srodrigc		XVFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error);
721159451Srodrigc		xfs_quiesce_fs(mp);
722153323Srodrigc		xfs_log_unmount_write(mp);
723153323Srodrigc		xfs_unmountfs_writesb(mp);
724153323Srodrigc		vfsp->vfs_flag |= VFS_RDONLY;
725153323Srodrigc	}
726153323Srodrigc	return 0;
727153323Srodrigc}
728153323Srodrigc
729153323Srodrigc/*
730153323Srodrigc * xfs_unmount_flush implements a set of flush operation on special
731153323Srodrigc * inodes, which are needed as a separate set of operations so that
732153323Srodrigc * they can be called as part of relocation process.
733153323Srodrigc */
734153323Srodrigcint
735153323Srodrigcxfs_unmount_flush(
736153323Srodrigc	xfs_mount_t	*mp,		/* Mount structure we are getting
737153323Srodrigc					   rid of. */
738153323Srodrigc	int             relocation)	/* Called from vfs relocation. */
739153323Srodrigc{
740153323Srodrigc	xfs_inode_t	*rip = mp->m_rootip;
741153323Srodrigc	xfs_inode_t	*rbmip;
742153323Srodrigc	xfs_inode_t	*rsumip = NULL;
743159451Srodrigc	xfs_vnode_t	*rvp = XFS_ITOV_NULL(rip);
744153323Srodrigc	int		error;
745153323Srodrigc
746159451Srodrigc	if (rvp == NULL)
747159451Srodrigc		return (0);
748153323Srodrigc	xfs_ilock(rip, XFS_ILOCK_EXCL);
749153323Srodrigc	xfs_iflock(rip);
750153323Srodrigc
751153323Srodrigc	/*
752153323Srodrigc	 * Flush out the real time inodes.
753153323Srodrigc	 */
754153323Srodrigc	if ((rbmip = mp->m_rbmip) != NULL) {
755153323Srodrigc		xfs_ilock(rbmip, XFS_ILOCK_EXCL);
756153323Srodrigc		xfs_iflock(rbmip);
757153323Srodrigc		error = xfs_iflush(rbmip, XFS_IFLUSH_SYNC);
758153323Srodrigc		xfs_iunlock(rbmip, XFS_ILOCK_EXCL);
759153323Srodrigc
760153323Srodrigc		if (error == EFSCORRUPTED)
761153323Srodrigc			goto fscorrupt_out;
762153323Srodrigc
763153323Srodrigc		ASSERT(vn_count(XFS_ITOV(rbmip)) == 1);
764153323Srodrigc
765153323Srodrigc		rsumip = mp->m_rsumip;
766153323Srodrigc		xfs_ilock(rsumip, XFS_ILOCK_EXCL);
767153323Srodrigc		xfs_iflock(rsumip);
768153323Srodrigc		error = xfs_iflush(rsumip, XFS_IFLUSH_SYNC);
769153323Srodrigc		xfs_iunlock(rsumip, XFS_ILOCK_EXCL);
770153323Srodrigc
771153323Srodrigc		if (error == EFSCORRUPTED)
772153323Srodrigc			goto fscorrupt_out;
773153323Srodrigc
774153323Srodrigc		ASSERT(vn_count(XFS_ITOV(rsumip)) == 1);
775153323Srodrigc	}
776153323Srodrigc
777153323Srodrigc	/*
778153323Srodrigc	 * Synchronously flush root inode to disk
779153323Srodrigc	 */
780153323Srodrigc	error = xfs_iflush(rip, XFS_IFLUSH_SYNC);
781153323Srodrigc	if (error == EFSCORRUPTED)
782153323Srodrigc		goto fscorrupt_out2;
783153323Srodrigc
784153323Srodrigc	if (vn_count(rvp) != 1 && !relocation) {
785153323Srodrigc		xfs_iunlock(rip, XFS_ILOCK_EXCL);
786153323Srodrigc		return XFS_ERROR(EBUSY);
787153323Srodrigc	}
788153323Srodrigc
789153323Srodrigc	/*
790153323Srodrigc	 * Release dquot that rootinode, rbmino and rsumino might be holding,
791153323Srodrigc	 * flush and purge the quota inodes.
792153323Srodrigc	 */
793153323Srodrigc	error = XFS_QM_UNMOUNT(mp);
794153323Srodrigc	if (error == EFSCORRUPTED)
795153323Srodrigc		goto fscorrupt_out2;
796153323Srodrigc
797153323Srodrigc	if (rbmip) {
798153323Srodrigc		VN_RELE(XFS_ITOV(rbmip));
799153323Srodrigc		VN_RELE(XFS_ITOV(rsumip));
800153323Srodrigc	}
801153323Srodrigc
802153323Srodrigc	xfs_iunlock(rip, XFS_ILOCK_EXCL);
803153323Srodrigc	return 0;
804153323Srodrigc
805153323Srodrigcfscorrupt_out:
806153323Srodrigc	xfs_ifunlock(rip);
807153323Srodrigc
808153323Srodrigcfscorrupt_out2:
809153323Srodrigc	xfs_iunlock(rip, XFS_ILOCK_EXCL);
810153323Srodrigc
811153323Srodrigc	return XFS_ERROR(EFSCORRUPTED);
812153323Srodrigc}
813153323Srodrigc
814153323Srodrigc/*
815153323Srodrigc * xfs_root extracts the root vnode from a vfs.
816153323Srodrigc *
817153323Srodrigc * vfsp -- the vfs struct for the desired file system
818153323Srodrigc * vpp  -- address of the caller's vnode pointer which should be
819153323Srodrigc *         set to the desired fs root vnode
820153323Srodrigc */
821153323SrodrigcSTATIC int
822153323Srodrigcxfs_root(
823153323Srodrigc	bhv_desc_t	*bdp,
824153323Srodrigc	xfs_vnode_t	**vpp)
825153323Srodrigc{
826153323Srodrigc	xfs_vnode_t	*vp;
827153323Srodrigc
828153323Srodrigc	vp = XFS_ITOV((XFS_BHVTOM(bdp))->m_rootip);
829153323Srodrigc	VN_HOLD(vp);
830153323Srodrigc	*vpp = vp;
831153323Srodrigc	return 0;
832153323Srodrigc}
833153323Srodrigc
834153323Srodrigc/*
835153323Srodrigc * xfs_statvfs
836153323Srodrigc *
837153323Srodrigc * Fill in the statvfs structure for the given file system.  We use
838153323Srodrigc * the superblock lock in the mount structure to ensure a consistent
839153323Srodrigc * snapshot of the counters returned.
840153323Srodrigc */
841153323SrodrigcSTATIC int
842153323Srodrigcxfs_statvfs(
843153323Srodrigc	bhv_desc_t	*bdp,
844153323Srodrigc	xfs_statfs_t	*statp,
845153323Srodrigc	xfs_vnode_t	*vp)
846153323Srodrigc{
847153323Srodrigc	__uint64_t	fakeinos;
848153323Srodrigc	xfs_extlen_t	lsize;
849153323Srodrigc	xfs_mount_t	*mp;
850153323Srodrigc	xfs_sb_t	*sbp;
851153323Srodrigc	unsigned long	s;
852153323Srodrigc
853153323Srodrigc	mp = XFS_BHVTOM(bdp);
854153323Srodrigc	sbp = &(mp->m_sb);
855153323Srodrigc
856153323Srodrigc	statp->f_type = XFS_SB_MAGIC;
857153323Srodrigc
858159451Srodrigc	xfs_icsb_sync_counters_lazy(mp);
859153323Srodrigc	s = XFS_SB_LOCK(mp);
860153323Srodrigc	statp->f_bsize = sbp->sb_blocksize;
861153323Srodrigc	lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
862153323Srodrigc	statp->f_blocks = sbp->sb_dblocks - lsize;
863153323Srodrigc	statp->f_bfree = statp->f_bavail = sbp->sb_fdblocks;
864153323Srodrigc	fakeinos = statp->f_bfree << sbp->sb_inopblog;
865153323Srodrigc#if XFS_BIG_INUMS
866153323Srodrigc	fakeinos += mp->m_inoadd;
867153323Srodrigc#endif
868153323Srodrigc	statp->f_files =
869153323Srodrigc	    MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
870153323Srodrigc	if (mp->m_maxicount)
871153323Srodrigc#if XFS_BIG_INUMS
872153323Srodrigc		if (!mp->m_inoadd)
873153323Srodrigc#endif
874159451Srodrigc			statp->f_files = min_t(typeof(statp->f_files),
875159451Srodrigc						statp->f_files,
876159451Srodrigc						mp->m_maxicount);
877153323Srodrigc	statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
878153323Srodrigc	XFS_SB_UNLOCK(mp, s);
879153323Srodrigc
880159451Srodrigc	xfs_statvfs_fsid(statp, mp);
881153323Srodrigc	return 0;
882153323Srodrigc}
883153323Srodrigc
884153323Srodrigc
885153323Srodrigc/*
886153323Srodrigc * xfs_sync flushes any pending I/O to file system vfsp.
887153323Srodrigc *
888153323Srodrigc * This routine is called by vfs_sync() to make sure that things make it
889153323Srodrigc * out to disk eventually, on sync() system calls to flush out everything,
890153323Srodrigc * and when the file system is unmounted.  For the vfs_sync() case, all
891153323Srodrigc * we really need to do is sync out the log to make all of our meta-data
892153323Srodrigc * updates permanent (except for timestamps).  For calls from pflushd(),
893153323Srodrigc * dirty pages are kept moving by calling pdflush() on the inodes
894153323Srodrigc * containing them.  We also flush the inodes that we can lock without
895153323Srodrigc * sleeping and the superblock if we can lock it without sleeping from
896153323Srodrigc * vfs_sync() so that items at the tail of the log are always moving out.
897153323Srodrigc *
898153323Srodrigc * Flags:
899153323Srodrigc *      SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want
900153323Srodrigc *		       to sleep if we can help it.  All we really need
901153323Srodrigc *		       to do is ensure that the log is synced at least
902153323Srodrigc *		       periodically.  We also push the inodes and
903153323Srodrigc *		       superblock if we can lock them without sleeping
904153323Srodrigc *			and they are not pinned.
905153323Srodrigc *      SYNC_ATTR    - We need to flush the inodes.  If SYNC_BDFLUSH is not
906153323Srodrigc *		       set, then we really want to lock each inode and flush
907153323Srodrigc *		       it.
908153323Srodrigc *      SYNC_WAIT    - All the flushes that take place in this call should
909153323Srodrigc *		       be synchronous.
910153323Srodrigc *      SYNC_DELWRI  - This tells us to push dirty pages associated with
911153323Srodrigc *		       inodes.  SYNC_WAIT and SYNC_BDFLUSH are used to
912153323Srodrigc *		       determine if they should be flushed sync, async, or
913153323Srodrigc *		       delwri.
914153323Srodrigc *      SYNC_CLOSE   - This flag is passed when the system is being
915159451Srodrigc *		       unmounted.  We should sync and invalidate everything.
916153323Srodrigc *      SYNC_FSDATA  - This indicates that the caller would like to make
917153323Srodrigc *		       sure the superblock is safe on disk.  We can ensure
918159451Srodrigc *		       this by simply making sure the log gets flushed
919153323Srodrigc *		       if SYNC_BDFLUSH is set, and by actually writing it
920153323Srodrigc *		       out otherwise.
921153323Srodrigc *
922153323Srodrigc */
923153323Srodrigc/*ARGSUSED*/
924153323SrodrigcSTATIC int
925153323Srodrigcxfs_sync(
926153323Srodrigc	bhv_desc_t	*bdp,
927153323Srodrigc	int		flags,
928153323Srodrigc	cred_t		*credp)
929153323Srodrigc{
930159451Srodrigc	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
931153323Srodrigc
932159451Srodrigc	if (unlikely(flags == SYNC_QUIESCE))
933159451Srodrigc		return xfs_quiesce_fs(mp);
934159451Srodrigc	else
935159451Srodrigc		return xfs_syncsub(mp, flags, 0, NULL);
936153323Srodrigc}
937153323Srodrigc
938153323Srodrigc/*
939153323Srodrigc * xfs sync routine for internal use
940153323Srodrigc *
941153323Srodrigc * This routine supports all of the flags defined for the generic VFS_SYNC
942153323Srodrigc * interface as explained above under xfs_sync.  In the interests of not
943159451Srodrigc * changing interfaces within the 6.5 family, additional internally-
944153323Srodrigc * required functions are specified within a separate xflags parameter,
945153323Srodrigc * only available by calling this routine.
946153323Srodrigc *
947153323Srodrigc */
948159451Srodrigcint
949153323Srodrigcxfs_sync_inodes(
950153323Srodrigc	xfs_mount_t	*mp,
951153323Srodrigc	int		flags,
952153323Srodrigc	int             xflags,
953153323Srodrigc	int             *bypassed)
954153323Srodrigc{
955153323Srodrigc	xfs_inode_t	*ip = NULL;
956153323Srodrigc	xfs_inode_t	*ip_next;
957153323Srodrigc	xfs_buf_t	*bp;
958153323Srodrigc	xfs_vnode_t	*vp = NULL;
959153323Srodrigc	int		error;
960153323Srodrigc	int		last_error;
961153323Srodrigc	uint64_t	fflag;
962153323Srodrigc	uint		lock_flags;
963153323Srodrigc	uint		base_lock_flags;
964153323Srodrigc	boolean_t	mount_locked;
965153323Srodrigc	boolean_t	vnode_refed;
966153323Srodrigc	int		preempt;
967153323Srodrigc	xfs_dinode_t	*dip;
968153323Srodrigc	xfs_iptr_t	*ipointer;
969153323Srodrigc#ifdef DEBUG
970153323Srodrigc	boolean_t	ipointer_in = B_FALSE;
971153323Srodrigc
972153323Srodrigc#define IPOINTER_SET	ipointer_in = B_TRUE
973153323Srodrigc#define IPOINTER_CLR	ipointer_in = B_FALSE
974153323Srodrigc#else
975153323Srodrigc#define IPOINTER_SET
976153323Srodrigc#define IPOINTER_CLR
977153323Srodrigc#endif
978153323Srodrigc
979153323Srodrigc
980153323Srodrigc/* Insert a marker record into the inode list after inode ip. The list
981153323Srodrigc * must be locked when this is called. After the call the list will no
982153323Srodrigc * longer be locked.
983153323Srodrigc */
984153323Srodrigc#define IPOINTER_INSERT(ip, mp)	{ \
985153323Srodrigc		ASSERT(ipointer_in == B_FALSE); \
986153323Srodrigc		ipointer->ip_mnext = ip->i_mnext; \
987153323Srodrigc		ipointer->ip_mprev = ip; \
988153323Srodrigc		ip->i_mnext = (xfs_inode_t *)ipointer; \
989153323Srodrigc		ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \
990153323Srodrigc		preempt = 0; \
991153323Srodrigc		XFS_MOUNT_IUNLOCK(mp); \
992153323Srodrigc		mount_locked = B_FALSE; \
993153323Srodrigc		IPOINTER_SET; \
994153323Srodrigc	}
995153323Srodrigc
996153323Srodrigc/* Remove the marker from the inode list. If the marker was the only item
997153323Srodrigc * in the list then there are no remaining inodes and we should zero out
998153323Srodrigc * the whole list. If we are the current head of the list then move the head
999153323Srodrigc * past us.
1000153323Srodrigc */
1001153323Srodrigc#define IPOINTER_REMOVE(ip, mp)	{ \
1002153323Srodrigc		ASSERT(ipointer_in == B_TRUE); \
1003153323Srodrigc		if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \
1004153323Srodrigc			ip = ipointer->ip_mnext; \
1005153323Srodrigc			ip->i_mprev = ipointer->ip_mprev; \
1006153323Srodrigc			ipointer->ip_mprev->i_mnext = ip; \
1007153323Srodrigc			if (mp->m_inodes == (xfs_inode_t *)ipointer) { \
1008153323Srodrigc				mp->m_inodes = ip; \
1009153323Srodrigc			} \
1010153323Srodrigc		} else { \
1011153323Srodrigc			ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \
1012153323Srodrigc			mp->m_inodes = NULL; \
1013153323Srodrigc			ip = NULL; \
1014153323Srodrigc		} \
1015153323Srodrigc		IPOINTER_CLR; \
1016153323Srodrigc	}
1017153323Srodrigc
1018153323Srodrigc#define XFS_PREEMPT_MASK	0x7f
1019153323Srodrigc
1020153323Srodrigc	if (bypassed)
1021153323Srodrigc		*bypassed = 0;
1022153323Srodrigc	if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY)
1023153323Srodrigc		return 0;
1024153323Srodrigc	error = 0;
1025153323Srodrigc	last_error = 0;
1026153323Srodrigc	preempt = 0;
1027153323Srodrigc
1028153323Srodrigc	/* Allocate a reference marker */
1029153323Srodrigc	ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP);
1030153323Srodrigc
1031153323Srodrigc	fflag = XFS_B_ASYNC;		/* default is don't wait */
1032159451Srodrigc	if (flags & (SYNC_BDFLUSH | SYNC_DELWRI))
1033153323Srodrigc		fflag = XFS_B_DELWRI;
1034153323Srodrigc	if (flags & SYNC_WAIT)
1035153323Srodrigc		fflag = 0;		/* synchronous overrides all */
1036153323Srodrigc
1037153323Srodrigc	base_lock_flags = XFS_ILOCK_SHARED;
1038153323Srodrigc	if (flags & (SYNC_DELWRI | SYNC_CLOSE)) {
1039153323Srodrigc		/*
1040153323Srodrigc		 * We need the I/O lock if we're going to call any of
1041153323Srodrigc		 * the flush/inval routines.
1042153323Srodrigc		 */
1043153323Srodrigc		base_lock_flags |= XFS_IOLOCK_SHARED;
1044153323Srodrigc	}
1045153323Srodrigc
1046153323Srodrigc	XFS_MOUNT_ILOCK(mp);
1047153323Srodrigc
1048153323Srodrigc	ip = mp->m_inodes;
1049153323Srodrigc
1050153323Srodrigc	mount_locked = B_TRUE;
1051153323Srodrigc	vnode_refed  = B_FALSE;
1052153323Srodrigc
1053153323Srodrigc	IPOINTER_CLR;
1054153323Srodrigc
1055153323Srodrigc	do {
1056153323Srodrigc		ASSERT(ipointer_in == B_FALSE);
1057153323Srodrigc		ASSERT(vnode_refed == B_FALSE);
1058153323Srodrigc
1059153323Srodrigc		lock_flags = base_lock_flags;
1060153323Srodrigc
1061153323Srodrigc		/*
1062153323Srodrigc		 * There were no inodes in the list, just break out
1063153323Srodrigc		 * of the loop.
1064153323Srodrigc		 */
1065153323Srodrigc		if (ip == NULL) {
1066153323Srodrigc			break;
1067153323Srodrigc		}
1068153323Srodrigc
1069153323Srodrigc		/*
1070153323Srodrigc		 * We found another sync thread marker - skip it
1071153323Srodrigc		 */
1072153323Srodrigc		if (ip->i_mount == NULL) {
1073153323Srodrigc			ip = ip->i_mnext;
1074153323Srodrigc			continue;
1075153323Srodrigc		}
1076153323Srodrigc
1077153323Srodrigc		vp = XFS_ITOV_NULL(ip);
1078153323Srodrigc
1079153323Srodrigc		/*
1080153323Srodrigc		 * If the vnode is gone then this is being torn down,
1081153323Srodrigc		 * call reclaim if it is flushed, else let regular flush
1082153323Srodrigc		 * code deal with it later in the loop.
1083153323Srodrigc		 */
1084153323Srodrigc
1085153323Srodrigc		if (vp == NULL) {
1086153323Srodrigc			/* Skip ones already in reclaim */
1087153323Srodrigc			if (ip->i_flags & XFS_IRECLAIM) {
1088153323Srodrigc				ip = ip->i_mnext;
1089153323Srodrigc				continue;
1090153323Srodrigc			}
1091153323Srodrigc			if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
1092153323Srodrigc				ip = ip->i_mnext;
1093153323Srodrigc			} else if ((xfs_ipincount(ip) == 0) &&
1094153323Srodrigc				    xfs_iflock_nowait(ip)) {
1095153323Srodrigc				IPOINTER_INSERT(ip, mp);
1096153323Srodrigc
1097153323Srodrigc				xfs_finish_reclaim(ip, 1,
1098153323Srodrigc						XFS_IFLUSH_DELWRI_ELSE_ASYNC);
1099153323Srodrigc
1100153323Srodrigc				XFS_MOUNT_ILOCK(mp);
1101153323Srodrigc				mount_locked = B_TRUE;
1102153323Srodrigc				IPOINTER_REMOVE(ip, mp);
1103153323Srodrigc			} else {
1104153323Srodrigc				xfs_iunlock(ip, XFS_ILOCK_EXCL);
1105153323Srodrigc				ip = ip->i_mnext;
1106153323Srodrigc			}
1107153323Srodrigc			continue;
1108153323Srodrigc		}
1109153323Srodrigc
1110159451Srodrigc		if (VN_BAD(vp)) {
1111159451Srodrigc			ip = ip->i_mnext;
1112159451Srodrigc			continue;
1113159451Srodrigc		}
1114159451Srodrigc
1115153323Srodrigc		if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) {
1116153323Srodrigc			XFS_MOUNT_IUNLOCK(mp);
1117153323Srodrigc			kmem_free(ipointer, sizeof(xfs_iptr_t));
1118153323Srodrigc			return 0;
1119153323Srodrigc		}
1120153323Srodrigc
1121153323Srodrigc		/*
1122153323Srodrigc		 * If this is just vfs_sync() or pflushd() calling
1123153323Srodrigc		 * then we can skip inodes for which it looks like
1124153323Srodrigc		 * there is nothing to do.  Since we don't have the
1125159451Srodrigc		 * inode locked this is racy, but these are periodic
1126153323Srodrigc		 * calls so it doesn't matter.  For the others we want
1127153323Srodrigc		 * to know for sure, so we at least try to lock them.
1128153323Srodrigc		 */
1129153323Srodrigc		if (flags & SYNC_BDFLUSH) {
1130153323Srodrigc			if (((ip->i_itemp == NULL) ||
1131153323Srodrigc			     !(ip->i_itemp->ili_format.ilf_fields &
1132153323Srodrigc			       XFS_ILOG_ALL)) &&
1133153323Srodrigc			    (ip->i_update_core == 0)) {
1134153323Srodrigc				ip = ip->i_mnext;
1135153323Srodrigc				continue;
1136153323Srodrigc			}
1137153323Srodrigc		}
1138153323Srodrigc
1139153323Srodrigc		/*
1140153323Srodrigc		 * Try to lock without sleeping.  We're out of order with
1141153323Srodrigc		 * the inode list lock here, so if we fail we need to drop
1142153323Srodrigc		 * the mount lock and try again.  If we're called from
1143153323Srodrigc		 * bdflush() here, then don't bother.
1144153323Srodrigc		 *
1145153323Srodrigc		 * The inode lock here actually coordinates with the
1146153323Srodrigc		 * almost spurious inode lock in xfs_ireclaim() to prevent
1147153323Srodrigc		 * the vnode we handle here without a reference from
1148153323Srodrigc		 * being freed while we reference it.  If we lock the inode
1149153323Srodrigc		 * while it's on the mount list here, then the spurious inode
1150153323Srodrigc		 * lock in xfs_ireclaim() after the inode is pulled from
1151153323Srodrigc		 * the mount list will sleep until we release it here.
1152153323Srodrigc		 * This keeps the vnode from being freed while we reference
1153159451Srodrigc		 * it.
1154153323Srodrigc		 */
1155153323Srodrigc		if (xfs_ilock_nowait(ip, lock_flags) == 0) {
1156153323Srodrigc			if ((flags & SYNC_BDFLUSH) || (vp == NULL)) {
1157153323Srodrigc				ip = ip->i_mnext;
1158153323Srodrigc				continue;
1159153323Srodrigc			}
1160153323Srodrigc
1161159451Srodrigc			vp = vn_grab(vp);
1162153323Srodrigc			if (vp == NULL) {
1163159451Srodrigc				ip = ip->i_mnext;
1164153323Srodrigc				continue;
1165153323Srodrigc			}
1166153323Srodrigc
1167159451Srodrigc			IPOINTER_INSERT(ip, mp);
1168153323Srodrigc			xfs_ilock(ip, lock_flags);
1169153323Srodrigc
1170153323Srodrigc			ASSERT(vp == XFS_ITOV(ip));
1171153323Srodrigc			ASSERT(ip->i_mount == mp);
1172153323Srodrigc
1173153323Srodrigc			vnode_refed = B_TRUE;
1174153323Srodrigc		}
1175153323Srodrigc
1176153323Srodrigc		/* From here on in the loop we may have a marker record
1177153323Srodrigc		 * in the inode list.
1178153323Srodrigc		 */
1179153323Srodrigc
1180153323Srodrigc		if ((flags & SYNC_CLOSE)  && (vp != NULL)) {
1181153323Srodrigc			/*
1182153323Srodrigc			 * This is the shutdown case.  We just need to
1183153323Srodrigc			 * flush and invalidate all the pages associated
1184153323Srodrigc			 * with the inode.  Drop the inode lock since
1185153323Srodrigc			 * we can't hold it across calls to the buffer
1186153323Srodrigc			 * cache.
1187153323Srodrigc			 *
1188153323Srodrigc			 * We don't set the VREMAPPING bit in the vnode
1189153323Srodrigc			 * here, because we don't hold the vnode lock
1190153323Srodrigc			 * exclusively.  It doesn't really matter, though,
1191153323Srodrigc			 * because we only come here when we're shutting
1192153323Srodrigc			 * down anyway.
1193153323Srodrigc			 */
1194153323Srodrigc			xfs_iunlock(ip, XFS_ILOCK_SHARED);
1195153323Srodrigc
1196153323Srodrigc			if (XFS_FORCED_SHUTDOWN(mp)) {
1197153323Srodrigc				XVOP_TOSS_PAGES(vp, 0, -1, FI_REMAPF);
1198153323Srodrigc			} else {
1199153323Srodrigc				XVOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_REMAPF);
1200153323Srodrigc			}
1201153323Srodrigc
1202153323Srodrigc			xfs_ilock(ip, XFS_ILOCK_SHARED);
1203153323Srodrigc
1204153323Srodrigc		} else if ((flags & SYNC_DELWRI) && (vp != NULL)) {
1205153323Srodrigc			if (VN_DIRTY(vp)) {
1206153323Srodrigc				/* We need to have dropped the lock here,
1207153323Srodrigc				 * so insert a marker if we have not already
1208153323Srodrigc				 * done so.
1209153323Srodrigc				 */
1210153323Srodrigc				if (mount_locked) {
1211153323Srodrigc					IPOINTER_INSERT(ip, mp);
1212153323Srodrigc				}
1213153323Srodrigc
1214153323Srodrigc				/*
1215153323Srodrigc				 * Drop the inode lock since we can't hold it
1216153323Srodrigc				 * across calls to the buffer cache.
1217153323Srodrigc				 */
1218153323Srodrigc				xfs_iunlock(ip, XFS_ILOCK_SHARED);
1219153323Srodrigc				XVOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1,
1220153323Srodrigc							fflag, FI_NONE, error);
1221153323Srodrigc				xfs_ilock(ip, XFS_ILOCK_SHARED);
1222153323Srodrigc			}
1223153323Srodrigc
1224153323Srodrigc		}
1225153323Srodrigc
1226153323Srodrigc		if (flags & SYNC_BDFLUSH) {
1227153323Srodrigc			if ((flags & SYNC_ATTR) &&
1228153323Srodrigc			    ((ip->i_update_core) ||
1229153323Srodrigc			     ((ip->i_itemp != NULL) &&
1230153323Srodrigc			      (ip->i_itemp->ili_format.ilf_fields != 0)))) {
1231153323Srodrigc
1232153323Srodrigc				/* Insert marker and drop lock if not already
1233153323Srodrigc				 * done.
1234153323Srodrigc				 */
1235153323Srodrigc				if (mount_locked) {
1236153323Srodrigc					IPOINTER_INSERT(ip, mp);
1237153323Srodrigc				}
1238153323Srodrigc
1239153323Srodrigc				/*
1240153323Srodrigc				 * We don't want the periodic flushing of the
1241153323Srodrigc				 * inodes by vfs_sync() to interfere with
1242153323Srodrigc				 * I/O to the file, especially read I/O
1243153323Srodrigc				 * where it is only the access time stamp
1244153323Srodrigc				 * that is being flushed out.  To prevent
1245153323Srodrigc				 * long periods where we have both inode
1246153323Srodrigc				 * locks held shared here while reading the
1247153323Srodrigc				 * inode's buffer in from disk, we drop the
1248153323Srodrigc				 * inode lock while reading in the inode
1249153323Srodrigc				 * buffer.  We have to release the buffer
1250153323Srodrigc				 * and reacquire the inode lock so that they
1251153323Srodrigc				 * are acquired in the proper order (inode
1252153323Srodrigc				 * locks first).  The buffer will go at the
1253153323Srodrigc				 * end of the lru chain, though, so we can
1254153323Srodrigc				 * expect it to still be there when we go
1255153323Srodrigc				 * for it again in xfs_iflush().
1256153323Srodrigc				 */
1257153323Srodrigc				if ((xfs_ipincount(ip) == 0) &&
1258153323Srodrigc				    xfs_iflock_nowait(ip)) {
1259153323Srodrigc
1260153323Srodrigc					xfs_ifunlock(ip);
1261153323Srodrigc					xfs_iunlock(ip, XFS_ILOCK_SHARED);
1262153323Srodrigc
1263153323Srodrigc					error = xfs_itobp(mp, NULL, ip,
1264159451Srodrigc							  &dip, &bp, 0, 0);
1265153323Srodrigc					if (!error) {
1266153323Srodrigc						xfs_buf_relse(bp);
1267153323Srodrigc					} else {
1268153323Srodrigc						/* Bailing out, remove the
1269153323Srodrigc						 * marker and free it.
1270153323Srodrigc						 */
1271153323Srodrigc						XFS_MOUNT_ILOCK(mp);
1272153323Srodrigc
1273153323Srodrigc						IPOINTER_REMOVE(ip, mp);
1274153323Srodrigc
1275153323Srodrigc						XFS_MOUNT_IUNLOCK(mp);
1276153323Srodrigc
1277153323Srodrigc						ASSERT(!(lock_flags &
1278153323Srodrigc							XFS_IOLOCK_SHARED));
1279153323Srodrigc
1280153323Srodrigc						kmem_free(ipointer,
1281153323Srodrigc							sizeof(xfs_iptr_t));
1282153323Srodrigc						return (0);
1283153323Srodrigc					}
1284153323Srodrigc
1285153323Srodrigc					/*
1286153323Srodrigc					 * Since we dropped the inode lock,
1287153323Srodrigc					 * the inode may have been reclaimed.
1288153323Srodrigc					 * Therefore, we reacquire the mount
1289153323Srodrigc					 * lock and check to see if we were the
1290153323Srodrigc					 * inode reclaimed. If this happened
1291153323Srodrigc					 * then the ipointer marker will no
1292153323Srodrigc					 * longer point back at us. In this
1293153323Srodrigc					 * case, move ip along to the inode
1294153323Srodrigc					 * after the marker, remove the marker
1295153323Srodrigc					 * and continue.
1296153323Srodrigc					 */
1297153323Srodrigc					XFS_MOUNT_ILOCK(mp);
1298153323Srodrigc					mount_locked = B_TRUE;
1299153323Srodrigc
1300153323Srodrigc					if (ip != ipointer->ip_mprev) {
1301153323Srodrigc						IPOINTER_REMOVE(ip, mp);
1302153323Srodrigc
1303153323Srodrigc						ASSERT(!vnode_refed);
1304153323Srodrigc						ASSERT(!(lock_flags &
1305153323Srodrigc							XFS_IOLOCK_SHARED));
1306153323Srodrigc						continue;
1307153323Srodrigc					}
1308153323Srodrigc
1309153323Srodrigc					ASSERT(ip->i_mount == mp);
1310153323Srodrigc
1311153323Srodrigc					if (xfs_ilock_nowait(ip,
1312153323Srodrigc						    XFS_ILOCK_SHARED) == 0) {
1313153323Srodrigc						ASSERT(ip->i_mount == mp);
1314153323Srodrigc						/*
1315153323Srodrigc						 * We failed to reacquire
1316153323Srodrigc						 * the inode lock without
1317153323Srodrigc						 * sleeping, so just skip
1318153323Srodrigc						 * the inode for now.  We
1319153323Srodrigc						 * clear the ILOCK bit from
1320153323Srodrigc						 * the lock_flags so that we
1321153323Srodrigc						 * won't try to drop a lock
1322153323Srodrigc						 * we don't hold below.
1323153323Srodrigc						 */
1324153323Srodrigc						lock_flags &= ~XFS_ILOCK_SHARED;
1325153323Srodrigc						IPOINTER_REMOVE(ip_next, mp);
1326153323Srodrigc					} else if ((xfs_ipincount(ip) == 0) &&
1327153323Srodrigc						   xfs_iflock_nowait(ip)) {
1328153323Srodrigc						ASSERT(ip->i_mount == mp);
1329153323Srodrigc						/*
1330153323Srodrigc						 * Since this is vfs_sync()
1331153323Srodrigc						 * calling we only flush the
1332153323Srodrigc						 * inode out if we can lock
1333153323Srodrigc						 * it without sleeping and
1334153323Srodrigc						 * it is not pinned.  Drop
1335153323Srodrigc						 * the mount lock here so
1336153323Srodrigc						 * that we don't hold it for
1337153323Srodrigc						 * too long. We already have
1338153323Srodrigc						 * a marker in the list here.
1339153323Srodrigc						 */
1340153323Srodrigc						XFS_MOUNT_IUNLOCK(mp);
1341153323Srodrigc						mount_locked = B_FALSE;
1342153323Srodrigc						error = xfs_iflush(ip,
1343153323Srodrigc							   XFS_IFLUSH_DELWRI);
1344153323Srodrigc					} else {
1345153323Srodrigc						ASSERT(ip->i_mount == mp);
1346153323Srodrigc						IPOINTER_REMOVE(ip_next, mp);
1347153323Srodrigc					}
1348153323Srodrigc				}
1349153323Srodrigc
1350153323Srodrigc			}
1351153323Srodrigc
1352153323Srodrigc		} else {
1353153323Srodrigc			if ((flags & SYNC_ATTR) &&
1354153323Srodrigc			    ((ip->i_update_core) ||
1355153323Srodrigc			     ((ip->i_itemp != NULL) &&
1356153323Srodrigc			      (ip->i_itemp->ili_format.ilf_fields != 0)))) {
1357153323Srodrigc				if (mount_locked) {
1358153323Srodrigc					IPOINTER_INSERT(ip, mp);
1359153323Srodrigc				}
1360153323Srodrigc
1361153323Srodrigc				if (flags & SYNC_WAIT) {
1362153323Srodrigc					xfs_iflock(ip);
1363153323Srodrigc					error = xfs_iflush(ip,
1364153323Srodrigc							   XFS_IFLUSH_SYNC);
1365153323Srodrigc				} else {
1366153323Srodrigc					/*
1367153323Srodrigc					 * If we can't acquire the flush
1368153323Srodrigc					 * lock, then the inode is already
1369153323Srodrigc					 * being flushed so don't bother
1370153323Srodrigc					 * waiting.  If we can lock it then
1371153323Srodrigc					 * do a delwri flush so we can
1372153323Srodrigc					 * combine multiple inode flushes
1373153323Srodrigc					 * in each disk write.
1374153323Srodrigc					 */
1375153323Srodrigc					if (xfs_iflock_nowait(ip)) {
1376153323Srodrigc						error = xfs_iflush(ip,
1377153323Srodrigc							   XFS_IFLUSH_DELWRI);
1378153323Srodrigc					}
1379153323Srodrigc					else if (bypassed)
1380153323Srodrigc						(*bypassed)++;
1381153323Srodrigc				}
1382153323Srodrigc			}
1383153323Srodrigc		}
1384153323Srodrigc
1385153323Srodrigc		if (lock_flags != 0) {
1386153323Srodrigc			xfs_iunlock(ip, lock_flags);
1387153323Srodrigc		}
1388153323Srodrigc
1389153323Srodrigc		if (vnode_refed) {
1390153323Srodrigc			/*
1391153323Srodrigc			 * If we had to take a reference on the vnode
1392153323Srodrigc			 * above, then wait until after we've unlocked
1393153323Srodrigc			 * the inode to release the reference.  This is
1394153323Srodrigc			 * because we can be already holding the inode
1395153323Srodrigc			 * lock when VN_RELE() calls xfs_inactive().
1396153323Srodrigc			 *
1397153323Srodrigc			 * Make sure to drop the mount lock before calling
1398153323Srodrigc			 * VN_RELE() so that we don't trip over ourselves if
1399153323Srodrigc			 * we have to go for the mount lock again in the
1400153323Srodrigc			 * inactive code.
1401153323Srodrigc			 */
1402153323Srodrigc			if (mount_locked) {
1403153323Srodrigc				IPOINTER_INSERT(ip, mp);
1404153323Srodrigc			}
1405153323Srodrigc
1406153323Srodrigc			VN_RELE(vp);
1407153323Srodrigc
1408153323Srodrigc			vnode_refed = B_FALSE;
1409153323Srodrigc		}
1410153323Srodrigc
1411153323Srodrigc		if (error) {
1412153323Srodrigc			last_error = error;
1413153323Srodrigc		}
1414153323Srodrigc
1415153323Srodrigc		/*
1416153323Srodrigc		 * bail out if the filesystem is corrupted.
1417153323Srodrigc		 */
1418153323Srodrigc		if (error == EFSCORRUPTED)  {
1419153323Srodrigc			if (!mount_locked) {
1420153323Srodrigc				XFS_MOUNT_ILOCK(mp);
1421153323Srodrigc				IPOINTER_REMOVE(ip, mp);
1422153323Srodrigc			}
1423153323Srodrigc			XFS_MOUNT_IUNLOCK(mp);
1424153323Srodrigc			ASSERT(ipointer_in == B_FALSE);
1425153323Srodrigc			kmem_free(ipointer, sizeof(xfs_iptr_t));
1426153323Srodrigc			return XFS_ERROR(error);
1427153323Srodrigc		}
1428153323Srodrigc
1429153323Srodrigc		/* Let other threads have a chance at the mount lock
1430153323Srodrigc		 * if we have looped many times without dropping the
1431153323Srodrigc		 * lock.
1432153323Srodrigc		 */
1433153323Srodrigc		if ((++preempt & XFS_PREEMPT_MASK) == 0) {
1434153323Srodrigc			if (mount_locked) {
1435153323Srodrigc				IPOINTER_INSERT(ip, mp);
1436153323Srodrigc			}
1437153323Srodrigc		}
1438153323Srodrigc
1439153323Srodrigc		if (mount_locked == B_FALSE) {
1440153323Srodrigc			XFS_MOUNT_ILOCK(mp);
1441153323Srodrigc			mount_locked = B_TRUE;
1442153323Srodrigc			IPOINTER_REMOVE(ip, mp);
1443153323Srodrigc			continue;
1444153323Srodrigc		}
1445153323Srodrigc
1446153323Srodrigc		ASSERT(ipointer_in == B_FALSE);
1447153323Srodrigc		ip = ip->i_mnext;
1448153323Srodrigc
1449153323Srodrigc	} while (ip != mp->m_inodes);
1450153323Srodrigc
1451153323Srodrigc	XFS_MOUNT_IUNLOCK(mp);
1452153323Srodrigc
1453153323Srodrigc	ASSERT(ipointer_in == B_FALSE);
1454153323Srodrigc
1455153323Srodrigc	kmem_free(ipointer, sizeof(xfs_iptr_t));
1456153323Srodrigc	return XFS_ERROR(last_error);
1457153323Srodrigc}
1458153323Srodrigc
1459153323Srodrigc/*
1460153323Srodrigc * xfs sync routine for internal use
1461153323Srodrigc *
1462153323Srodrigc * This routine supports all of the flags defined for the generic VFS_SYNC
1463153323Srodrigc * interface as explained above under xfs_sync.  In the interests of not
1464159451Srodrigc * changing interfaces within the 6.5 family, additional internally-
1465153323Srodrigc * required functions are specified within a separate xflags parameter,
1466153323Srodrigc * only available by calling this routine.
1467153323Srodrigc *
1468153323Srodrigc */
1469153323Srodrigcint
1470153323Srodrigcxfs_syncsub(
1471153323Srodrigc	xfs_mount_t	*mp,
1472153323Srodrigc	int		flags,
1473153323Srodrigc	int             xflags,
1474153323Srodrigc	int             *bypassed)
1475153323Srodrigc{
1476153323Srodrigc	int		error = 0;
1477153323Srodrigc	int		last_error = 0;
1478153323Srodrigc	uint		log_flags = XFS_LOG_FORCE;
1479153323Srodrigc	xfs_buf_t	*bp;
1480153323Srodrigc	xfs_buf_log_item_t	*bip;
1481153323Srodrigc
1482153323Srodrigc	/*
1483153323Srodrigc	 * Sync out the log.  This ensures that the log is periodically
1484153323Srodrigc	 * flushed even if there is not enough activity to fill it up.
1485153323Srodrigc	 */
1486153323Srodrigc	if (flags & SYNC_WAIT)
1487153323Srodrigc		log_flags |= XFS_LOG_SYNC;
1488153323Srodrigc
1489153323Srodrigc	xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
1490153323Srodrigc
1491153323Srodrigc	if (flags & (SYNC_ATTR|SYNC_DELWRI)) {
1492153323Srodrigc		if (flags & SYNC_BDFLUSH)
1493153323Srodrigc			xfs_finish_reclaim_all(mp, 1);
1494153323Srodrigc		else
1495153323Srodrigc			error = xfs_sync_inodes(mp, flags, xflags, bypassed);
1496153323Srodrigc	}
1497153323Srodrigc
1498153323Srodrigc	/*
1499153323Srodrigc	 * Flushing out dirty data above probably generated more
1500153323Srodrigc	 * log activity, so if this isn't vfs_sync() then flush
1501153323Srodrigc	 * the log again.
1502153323Srodrigc	 */
1503153323Srodrigc	if (flags & SYNC_DELWRI) {
1504153323Srodrigc		xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
1505153323Srodrigc	}
1506153323Srodrigc
1507153323Srodrigc	if (flags & SYNC_FSDATA) {
1508153323Srodrigc		/*
1509153323Srodrigc		 * If this is vfs_sync() then only sync the superblock
1510153323Srodrigc		 * if we can lock it without sleeping and it is not pinned.
1511153323Srodrigc		 */
1512153323Srodrigc		if (flags & SYNC_BDFLUSH) {
1513153323Srodrigc			bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
1514153323Srodrigc			if (bp != NULL) {
1515153323Srodrigc				bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
1516153323Srodrigc				if ((bip != NULL) &&
1517153323Srodrigc				    xfs_buf_item_dirty(bip)) {
1518153323Srodrigc					if (!(XFS_BUF_ISPINNED(bp))) {
1519153323Srodrigc						XFS_BUF_ASYNC(bp);
1520153323Srodrigc						error = xfs_bwrite(mp, bp);
1521153323Srodrigc					} else {
1522153323Srodrigc						xfs_buf_relse(bp);
1523153323Srodrigc					}
1524153323Srodrigc				} else {
1525153323Srodrigc					xfs_buf_relse(bp);
1526153323Srodrigc				}
1527153323Srodrigc			}
1528153323Srodrigc		} else {
1529153323Srodrigc			bp = xfs_getsb(mp, 0);
1530153323Srodrigc			/*
1531153323Srodrigc			 * If the buffer is pinned then push on the log so
1532153323Srodrigc			 * we won't get stuck waiting in the write for
1533153323Srodrigc			 * someone, maybe ourselves, to flush the log.
1534153323Srodrigc			 * Even though we just pushed the log above, we
1535153323Srodrigc			 * did not have the superblock buffer locked at
1536153323Srodrigc			 * that point so it can become pinned in between
1537153323Srodrigc			 * there and here.
1538153323Srodrigc			 */
1539153323Srodrigc			if (XFS_BUF_ISPINNED(bp))
1540153323Srodrigc				xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
1541159451Srodrigc			if (flags & SYNC_WAIT)
1542159451Srodrigc				XFS_BUF_UNASYNC(bp);
1543159451Srodrigc			else
1544159451Srodrigc				XFS_BUF_ASYNC(bp);
1545153323Srodrigc			error = xfs_bwrite(mp, bp);
1546153323Srodrigc		}
1547153323Srodrigc		if (error) {
1548153323Srodrigc			last_error = error;
1549153323Srodrigc		}
1550153323Srodrigc	}
1551153323Srodrigc
1552153323Srodrigc	/*
1553153323Srodrigc	 * If this is the periodic sync, then kick some entries out of
1554153323Srodrigc	 * the reference cache.  This ensures that idle entries are
1555153323Srodrigc	 * eventually kicked out of the cache.
1556153323Srodrigc	 */
1557153323Srodrigc	if (flags & SYNC_REFCACHE) {
1558159451Srodrigc		if (flags & SYNC_WAIT)
1559159451Srodrigc			xfs_refcache_purge_mp(mp);
1560159451Srodrigc		else
1561159451Srodrigc			xfs_refcache_purge_some(mp);
1562153323Srodrigc	}
1563153323Srodrigc
1564153323Srodrigc	/*
1565153323Srodrigc	 * Now check to see if the log needs a "dummy" transaction.
1566153323Srodrigc	 */
1567153323Srodrigc
1568153323Srodrigc	if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) {
1569153323Srodrigc		xfs_trans_t *tp;
1570153323Srodrigc		xfs_inode_t *ip;
1571153323Srodrigc
1572153323Srodrigc		/*
1573153323Srodrigc		 * Put a dummy transaction in the log to tell
1574153323Srodrigc		 * recovery that all others are OK.
1575153323Srodrigc		 */
1576153323Srodrigc		tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
1577153323Srodrigc		if ((error = xfs_trans_reserve(tp, 0,
1578153323Srodrigc				XFS_ICHANGE_LOG_RES(mp),
1579153323Srodrigc				0, 0, 0)))  {
1580153323Srodrigc			xfs_trans_cancel(tp, 0);
1581153323Srodrigc			return error;
1582153323Srodrigc		}
1583153323Srodrigc
1584153323Srodrigc		ip = mp->m_rootip;
1585153323Srodrigc		xfs_ilock(ip, XFS_ILOCK_EXCL);
1586153323Srodrigc
1587153323Srodrigc		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1588153323Srodrigc		xfs_trans_ihold(tp, ip);
1589153323Srodrigc		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1590153323Srodrigc		error = xfs_trans_commit(tp, 0, NULL);
1591153323Srodrigc		xfs_iunlock(ip, XFS_ILOCK_EXCL);
1592153323Srodrigc		xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
1593153323Srodrigc	}
1594153323Srodrigc
1595153323Srodrigc	/*
1596153323Srodrigc	 * When shutting down, we need to insure that the AIL is pushed
1597153323Srodrigc	 * to disk or the filesystem can appear corrupt from the PROM.
1598153323Srodrigc	 */
1599153323Srodrigc	if ((flags & (SYNC_CLOSE|SYNC_WAIT)) == (SYNC_CLOSE|SYNC_WAIT)) {
1600153323Srodrigc		XFS_bflush(mp->m_ddev_targp);
1601153323Srodrigc		if (mp->m_rtdev_targp) {
1602153323Srodrigc			XFS_bflush(mp->m_rtdev_targp);
1603153323Srodrigc		}
1604153323Srodrigc	}
1605153323Srodrigc
1606153323Srodrigc	return XFS_ERROR(last_error);
1607153323Srodrigc}
1608153323Srodrigc
1609153323Srodrigc/*
1610159451Srodrigc * xfs_vget - called by DMAPI and NFSD to get vnode from file handle
1611153323Srodrigc */
1612153323SrodrigcSTATIC int
1613153323Srodrigcxfs_vget(
1614153323Srodrigc	bhv_desc_t	*bdp,
1615153323Srodrigc	xfs_vnode_t	**vpp,
1616153323Srodrigc	fid_t		*fidp)
1617153323Srodrigc{
1618159451Srodrigc	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
1619159451Srodrigc	xfs_fid_t	*xfid = (struct xfs_fid *)fidp;
1620153323Srodrigc	xfs_inode_t	*ip;
1621153323Srodrigc	int		error;
1622153323Srodrigc	xfs_ino_t	ino;
1623153323Srodrigc	unsigned int	igen;
1624153323Srodrigc
1625159451Srodrigc	/*
1626159451Srodrigc	 * Invalid.  Since handles can be created in user space and passed in
1627159451Srodrigc	 * via gethandle(), this is not cause for a panic.
1628159451Srodrigc	 */
1629159451Srodrigc	if (xfid->xfs_fid_len != sizeof(*xfid) - sizeof(xfid->xfs_fid_len))
1630153323Srodrigc		return XFS_ERROR(EINVAL);
1631159451Srodrigc
1632159451Srodrigc	ino  = xfid->xfs_fid_ino;
1633159451Srodrigc	igen = xfid->xfs_fid_gen;
1634159451Srodrigc
1635159451Srodrigc	/*
1636159451Srodrigc	 * NFS can sometimes send requests for ino 0.  Fail them gracefully.
1637159451Srodrigc	 */
1638159451Srodrigc	if (ino == 0)
1639159451Srodrigc		return XFS_ERROR(ESTALE);
1640159451Srodrigc
1641159451Srodrigc	error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0);
1642153323Srodrigc	if (error) {
1643153323Srodrigc		*vpp = NULL;
1644153323Srodrigc		return error;
1645153323Srodrigc	}
1646159451Srodrigc
1647153323Srodrigc	if (ip == NULL) {
1648153323Srodrigc		*vpp = NULL;
1649153323Srodrigc		return XFS_ERROR(EIO);
1650153323Srodrigc	}
1651153323Srodrigc
1652159451Srodrigc	if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) {
1653153323Srodrigc		xfs_iput_new(ip, XFS_ILOCK_SHARED);
1654153323Srodrigc		*vpp = NULL;
1655153323Srodrigc		return XFS_ERROR(ENOENT);
1656153323Srodrigc	}
1657153323Srodrigc
1658153323Srodrigc	*vpp = XFS_ITOV(ip);
1659153323Srodrigc	xfs_iunlock(ip, XFS_ILOCK_SHARED);
1660153323Srodrigc	return 0;
1661153323Srodrigc}
1662153323Srodrigc
1663153323Srodrigc
1664159451Srodrigc#define MNTOPT_LOGBUFS		"logbufs"	/* number of XFS log buffers */
1665159451Srodrigc#define MNTOPT_LOGBSIZE		"logbsize"	/* size of XFS log buffers */
1666159451Srodrigc#define MNTOPT_LOGDEV		"logdev"	/* log device */
1667159451Srodrigc#define MNTOPT_RTDEV		"rtdev"		/* realtime I/O device */
1668159451Srodrigc#define MNTOPT_BIOSIZE		"biosize"	/* log2 of preferred buffered io size */
1669159451Srodrigc#define MNTOPT_WSYNC		"wsync"		/* safe-mode nfs compatible mount */
1670159451Srodrigc#define MNTOPT_INO64		"ino64"		/* force inodes into 64-bit range */
1671159451Srodrigc#define MNTOPT_NOALIGN		"noalign"	/* turn off stripe alignment */
1672159451Srodrigc#define MNTOPT_SWALLOC		"swalloc"	/* turn on stripe width allocation */
1673159451Srodrigc#define MNTOPT_SUNIT		"sunit"		/* data volume stripe unit */
1674159451Srodrigc#define MNTOPT_SWIDTH		"swidth"	/* data volume stripe width */
1675159451Srodrigc#define MNTOPT_NOUUID		"nouuid"	/* ignore filesystem UUID */
1676159451Srodrigc#define MNTOPT_MTPT		"mtpt"		/* filesystem mount point */
1677159451Srodrigc#define MNTOPT_GRPID		"grpid"		/* group-ID from parent directory */
1678159451Srodrigc#define MNTOPT_NOGRPID		"nogrpid"	/* group-ID from current process */
1679159451Srodrigc#define MNTOPT_BSDGROUPS	"bsdgroups"    /* group-ID from parent directory */
1680159451Srodrigc#define MNTOPT_SYSVGROUPS	"sysvgroups"   /* group-ID from current process */
1681159451Srodrigc#define MNTOPT_ALLOCSIZE	"allocsize"    /* preferred allocation size */
1682159451Srodrigc#define MNTOPT_IHASHSIZE	"ihashsize"    /* size of inode hash table */
1683159451Srodrigc#define MNTOPT_NORECOVERY	"norecovery"   /* don't run XFS recovery */
1684159451Srodrigc#define MNTOPT_BARRIER		"barrier"	/* use writer barriers for log write and
1685159451Srodrigc					 * unwritten extent conversion */
1686159451Srodrigc#define MNTOPT_NOBARRIER	"nobarrier"	/* .. disable */
1687159451Srodrigc#define MNTOPT_OSYNCISOSYNC	"osyncisosync" /* o_sync is REALLY o_sync */
1688159451Srodrigc#define MNTOPT_64BITINODE	"inode64"	/* inodes can be allocated anywhere */
1689159451Srodrigc#define MNTOPT_IKEEP		"ikeep"		/* do not free empty inode clusters */
1690159451Srodrigc#define MNTOPT_NOIKEEP		"noikeep"	/* free empty inode clusters */
1691159451Srodrigc#define MNTOPT_LARGEIO		"largeio"	/* report large I/O sizes in stat() */
1692159451Srodrigc#define MNTOPT_NOLARGEIO	"nolargeio"	/* do not report large I/O sizes
1693159451Srodrigc					 * in stat(). */
1694159451Srodrigc#define MNTOPT_ATTR2		"attr2"		/* do use attr2 attribute format */
1695159451Srodrigc#define MNTOPT_NOATTR2		"noattr2"	/* do not use attr2 attribute format */
1696159451Srodrigc#define simple_strtoul		strtoul
1697153323Srodrigc
1698159451SrodrigcSTATIC unsigned long
1699159451Srodrigcsuffix_strtoul(char *cp, char **endp, unsigned int base)
1700159451Srodrigc{
1701159451Srodrigc	int	last, shift_left_factor = 0;
1702159451Srodrigc	char	*value = (char *)cp;
1703153323Srodrigc
1704159451Srodrigc	last = strlen(value) - 1;
1705159451Srodrigc	if (value[last] == 'K' || value[last] == 'k') {
1706159451Srodrigc		shift_left_factor = 10;
1707159451Srodrigc		value[last] = '\0';
1708159451Srodrigc	}
1709159451Srodrigc	if (value[last] == 'M' || value[last] == 'm') {
1710159451Srodrigc		shift_left_factor = 20;
1711159451Srodrigc		value[last] = '\0';
1712159451Srodrigc	}
1713159451Srodrigc	if (value[last] == 'G' || value[last] == 'g') {
1714159451Srodrigc		shift_left_factor = 30;
1715159451Srodrigc		value[last] = '\0';
1716159451Srodrigc	}
1717159451Srodrigc
1718159451Srodrigc	return simple_strtoul(cp, endp, base) << shift_left_factor;
1719159451Srodrigc}
1720159451Srodrigc
1721159451Srodrigc
1722153323SrodrigcSTATIC int
1723153323Srodrigcxfs_parseargs(
1724153323Srodrigc	struct bhv_desc		*bhv,
1725153323Srodrigc	char			*options,
1726153323Srodrigc	struct xfs_mount_args	*args,
1727153323Srodrigc	int			update)
1728153323Srodrigc{
1729153323Srodrigc	struct xfs_vfs		*vfsp = bhvtovfs(bhv);
1730153323Srodrigc	char			*this_char, *value, *eov;
1731153323Srodrigc	int			dsunit, dswidth, vol_dsunit, vol_dswidth;
1732153323Srodrigc	int			iosize;
1733153323Srodrigc
1734159451Srodrigc	args->flags |= XFSMNT_IDELETE;
1735159451Srodrigc	args->flags |= XFSMNT_BARRIER;
1736159451Srodrigc	args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
1737153323Srodrigc
1738153323Srodrigc	if (!options)
1739159451Srodrigc		goto done;
1740153323Srodrigc
1741153323Srodrigc	iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0;
1742153323Srodrigc
1743153323Srodrigc	while ((this_char = strsep(&options, ",")) != NULL) {
1744153323Srodrigc		if (!*this_char)
1745153323Srodrigc			continue;
1746153323Srodrigc
1747153323Srodrigc		if ((value = index(this_char, '=')) != NULL)
1748153323Srodrigc			*value++ = 0;
1749153323Srodrigc
1750153323Srodrigc		if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
1751153323Srodrigc			if (!value || !*value) {
1752153323Srodrigc				printf("XFS: %s option requires an argument\n",
1753159451Srodrigc					this_char);
1754153323Srodrigc				return EINVAL;
1755153323Srodrigc			}
1756153323Srodrigc			args->logbufs = simple_strtoul(value, &eov, 10);
1757153323Srodrigc		} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
1758153323Srodrigc			if (!value || !*value) {
1759153323Srodrigc				printf("XFS: %s option requires an argument\n",
1760159451Srodrigc					this_char);
1761153323Srodrigc				return EINVAL;
1762153323Srodrigc			}
1763159451Srodrigc			args->logbufsize = suffix_strtoul(value, &eov, 10);
1764153323Srodrigc		} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
1765153323Srodrigc			if (!value || !*value) {
1766153323Srodrigc				printf("XFS: %s option requires an argument\n",
1767159451Srodrigc					this_char);
1768153323Srodrigc				return EINVAL;
1769153323Srodrigc			}
1770153323Srodrigc			strncpy(args->logname, value, MAXNAMELEN);
1771153323Srodrigc		} else if (!strcmp(this_char, MNTOPT_MTPT)) {
1772153323Srodrigc			if (!value || !*value) {
1773153323Srodrigc				printf("XFS: %s option requires an argument\n",
1774159451Srodrigc					this_char);
1775153323Srodrigc				return EINVAL;
1776153323Srodrigc			}
1777153323Srodrigc			strncpy(args->mtpt, value, MAXNAMELEN);
1778153323Srodrigc		} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
1779153323Srodrigc			if (!value || !*value) {
1780153323Srodrigc				printf("XFS: %s option requires an argument\n",
1781159451Srodrigc					this_char);
1782153323Srodrigc				return EINVAL;
1783153323Srodrigc			}
1784153323Srodrigc			strncpy(args->rtname, value, MAXNAMELEN);
1785153323Srodrigc		} else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
1786153323Srodrigc			if (!value || !*value) {
1787153323Srodrigc				printf("XFS: %s option requires an argument\n",
1788159451Srodrigc					this_char);
1789153323Srodrigc				return EINVAL;
1790153323Srodrigc			}
1791153323Srodrigc			iosize = simple_strtoul(value, &eov, 10);
1792153323Srodrigc			args->flags |= XFSMNT_IOSIZE;
1793153323Srodrigc			args->iosizelog = (uint8_t) iosize;
1794159451Srodrigc		} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
1795159451Srodrigc			if (!value || !*value) {
1796159451Srodrigc				printk("XFS: %s option requires an argument\n",
1797159451Srodrigc					this_char);
1798159451Srodrigc				return EINVAL;
1799159451Srodrigc			}
1800159451Srodrigc			iosize = suffix_strtoul(value, &eov, 10);
1801159451Srodrigc			args->flags |= XFSMNT_IOSIZE;
1802159451Srodrigc			args->iosizelog = ffs(iosize) - 1;
1803159451Srodrigc		} else if (!strcmp(this_char, MNTOPT_IHASHSIZE)) {
1804159451Srodrigc			if (!value || !*value) {
1805159451Srodrigc				printk("XFS: %s option requires an argument\n",
1806159451Srodrigc					this_char);
1807159451Srodrigc				return EINVAL;
1808159451Srodrigc			}
1809159451Srodrigc			args->flags |= XFSMNT_IHASHSIZE;
1810159451Srodrigc			args->ihashsize = simple_strtoul(value, &eov, 10);
1811159451Srodrigc		} else if (!strcmp(this_char, MNTOPT_GRPID) ||
1812159451Srodrigc			   !strcmp(this_char, MNTOPT_BSDGROUPS)) {
1813159451Srodrigc			vfsp->vfs_flag |= VFS_GRPID;
1814159451Srodrigc		} else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
1815159451Srodrigc			   !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
1816159451Srodrigc			vfsp->vfs_flag &= ~VFS_GRPID;
1817153323Srodrigc		} else if (!strcmp(this_char, MNTOPT_WSYNC)) {
1818153323Srodrigc			args->flags |= XFSMNT_WSYNC;
1819153323Srodrigc		} else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
1820153323Srodrigc			args->flags |= XFSMNT_OSYNCISOSYNC;
1821153323Srodrigc		} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
1822153323Srodrigc			args->flags |= XFSMNT_NORECOVERY;
1823153323Srodrigc		} else if (!strcmp(this_char, MNTOPT_INO64)) {
1824153323Srodrigc			args->flags |= XFSMNT_INO64;
1825153323Srodrigc#if !XFS_BIG_INUMS
1826153323Srodrigc
1827153323Srodrigc			printf("XFS: %s option not allowed on this system\n",
1828159451Srodrigc				this_char);
1829153323Srodrigc			return EINVAL;
1830153323Srodrigc#endif
1831153323Srodrigc		} else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
1832153323Srodrigc			args->flags |= XFSMNT_NOALIGN;
1833159451Srodrigc		} else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
1834159451Srodrigc			args->flags |= XFSMNT_SWALLOC;
1835153323Srodrigc		} else if (!strcmp(this_char, MNTOPT_SUNIT)) {
1836153323Srodrigc			if (!value || !*value) {
1837153323Srodrigc				printf("XFS: %s option requires an argument\n",
1838159451Srodrigc					this_char);
1839153323Srodrigc				return EINVAL;
1840153323Srodrigc			}
1841153323Srodrigc			dsunit = simple_strtoul(value, &eov, 10);
1842153323Srodrigc		} else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
1843153323Srodrigc			if (!value || !*value) {
1844153323Srodrigc				printf("XFS: %s option requires an argument\n",
1845159451Srodrigc					this_char);
1846153323Srodrigc				return EINVAL;
1847153323Srodrigc			}
1848153323Srodrigc			dswidth = simple_strtoul(value, &eov, 10);
1849153323Srodrigc		} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
1850153323Srodrigc			args->flags &= ~XFSMNT_32BITINODES;
1851153323Srodrigc#if !XFS_BIG_INUMS
1852153323Srodrigc
1853153323Srodrigc			printf("XFS: %s option not allowed on this system\n",
1854159451Srodrigc				this_char);
1855153323Srodrigc			return EINVAL;
1856153323Srodrigc#endif
1857153323Srodrigc		} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
1858153323Srodrigc			args->flags |= XFSMNT_NOUUID;
1859159451Srodrigc		} else if (!strcmp(this_char, MNTOPT_BARRIER)) {
1860159451Srodrigc			args->flags |= XFSMNT_BARRIER;
1861159451Srodrigc		} else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
1862159451Srodrigc			args->flags &= ~XFSMNT_BARRIER;
1863153323Srodrigc		} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
1864153323Srodrigc			args->flags &= ~XFSMNT_IDELETE;
1865153323Srodrigc		} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
1866153323Srodrigc			args->flags |= XFSMNT_IDELETE;
1867159451Srodrigc		} else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
1868159451Srodrigc			args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE;
1869159451Srodrigc		} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
1870159451Srodrigc			args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
1871159451Srodrigc		} else if (!strcmp(this_char, MNTOPT_ATTR2)) {
1872159451Srodrigc			args->flags |= XFSMNT_ATTR2;
1873159451Srodrigc		} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
1874159451Srodrigc			args->flags &= ~XFSMNT_ATTR2;
1875153323Srodrigc		} else if (!strcmp(this_char, "osyncisdsync")) {
1876153323Srodrigc			/* no-op, this is now the default */
1877153323Srodrigcprintf("XFS: osyncisdsync is now the default, option is deprecated.\n");
1878153323Srodrigc		} else if (!strcmp(this_char, "irixsgid")) {
1879153323Srodrigcprintf("XFS: irixsgid is now a sysctl(2) variable, option is deprecated.\n");
1880153323Srodrigc		} else {
1881153323Srodrigc			printf("XFS: unknown mount option [%s].\n", this_char);
1882153323Srodrigc			return EINVAL;
1883153323Srodrigc		}
1884153323Srodrigc	}
1885153323Srodrigc
1886153323Srodrigc	if (args->flags & XFSMNT_NORECOVERY) {
1887153323Srodrigc		if ((vfsp->vfs_flag & VFS_RDONLY) == 0) {
1888153323Srodrigc			printf("XFS: no-recovery mounts must be read-only.\n");
1889153323Srodrigc			return EINVAL;
1890153323Srodrigc		}
1891153323Srodrigc	}
1892153323Srodrigc
1893153323Srodrigc	if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) {
1894153323Srodrigc		printf(
1895153323Srodrigc	"XFS: sunit and swidth options incompatible with the noalign option\n");
1896153323Srodrigc		return EINVAL;
1897153323Srodrigc	}
1898153323Srodrigc
1899153323Srodrigc	if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
1900153323Srodrigc		printf("XFS: sunit and swidth must be specified together\n");
1901153323Srodrigc		return EINVAL;
1902153323Srodrigc	}
1903153323Srodrigc
1904153323Srodrigc	if (dsunit && (dswidth % dsunit != 0)) {
1905153323Srodrigc		printf(
1906153323Srodrigc	"XFS: stripe width (%d) must be a multiple of the stripe unit (%d)\n",
1907153323Srodrigc			dswidth, dsunit);
1908153323Srodrigc		return EINVAL;
1909153323Srodrigc	}
1910153323Srodrigc
1911153323Srodrigc	if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
1912153323Srodrigc		if (dsunit) {
1913153323Srodrigc			args->sunit = dsunit;
1914153323Srodrigc			args->flags |= XFSMNT_RETERR;
1915153323Srodrigc		} else {
1916153323Srodrigc			args->sunit = vol_dsunit;
1917153323Srodrigc		}
1918153323Srodrigc		dswidth ? (args->swidth = dswidth) :
1919153323Srodrigc			  (args->swidth = vol_dswidth);
1920153323Srodrigc	} else {
1921153323Srodrigc		args->sunit = args->swidth = 0;
1922153323Srodrigc	}
1923153323Srodrigc
1924159451Srodrigcdone:
1925159451Srodrigc	if (args->flags & XFSMNT_32BITINODES)
1926159451Srodrigc		vfsp->vfs_flag |= VFS_32BITINODES;
1927159451Srodrigc	if (args->flags2)
1928159451Srodrigc		args->flags |= XFSMNT_FLAGS2;
1929153323Srodrigc	return 0;
1930153323Srodrigc}
1931153323Srodrigc
1932159451Srodrigc#define seq_printf sbuf_printf
1933153323SrodrigcSTATIC int
1934153323Srodrigcxfs_showargs(
1935153323Srodrigc	struct bhv_desc		*bhv,
1936153323Srodrigc	struct sbuf		*m)
1937153323Srodrigc{
1938153323Srodrigc	static struct proc_xfs_info {
1939153323Srodrigc		int	flag;
1940153323Srodrigc		char	*str;
1941153323Srodrigc	} xfs_info[] = {
1942153323Srodrigc		/* the few simple ones we can get from the mount struct */
1943153323Srodrigc		{ XFS_MOUNT_WSYNC,		"," MNTOPT_WSYNC },
1944153323Srodrigc		{ XFS_MOUNT_INO64,		"," MNTOPT_INO64 },
1945153323Srodrigc		{ XFS_MOUNT_NOALIGN,		"," MNTOPT_NOALIGN },
1946159451Srodrigc		{ XFS_MOUNT_SWALLOC,		"," MNTOPT_SWALLOC },
1947153323Srodrigc		{ XFS_MOUNT_NOUUID,		"," MNTOPT_NOUUID },
1948153323Srodrigc		{ XFS_MOUNT_NORECOVERY,		"," MNTOPT_NORECOVERY },
1949153323Srodrigc		{ XFS_MOUNT_OSYNCISOSYNC,	"," MNTOPT_OSYNCISOSYNC },
1950153323Srodrigc		{ 0, NULL }
1951153323Srodrigc	};
1952153323Srodrigc	struct proc_xfs_info	*xfs_infop;
1953153323Srodrigc	struct xfs_mount	*mp = XFS_BHVTOM(bhv);
1954159451Srodrigc	struct xfs_vfs		*vfsp = XFS_MTOVFS(mp);
1955153323Srodrigc
1956153323Srodrigc	for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) {
1957153323Srodrigc		if (mp->m_flags & xfs_infop->flag)
1958240207Sdim			sbuf_printf(m, "%s", xfs_infop->str);
1959153323Srodrigc	}
1960153323Srodrigc
1961159451Srodrigc	if (mp->m_flags & XFS_MOUNT_IHASHSIZE)
1962159451Srodrigc		seq_printf(m, "," MNTOPT_IHASHSIZE "=%d", mp->m_ihsize);
1963159451Srodrigc
1964153323Srodrigc	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
1965159451Srodrigc		seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
1966159451Srodrigc				(int)(1 << mp->m_writeio_log) >> 10);
1967153323Srodrigc
1968153323Srodrigc	if (mp->m_logbufs > 0)
1969153323Srodrigc		sbuf_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
1970153323Srodrigc	if (mp->m_logbsize > 0)
1971159451Srodrigc		seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
1972153323Srodrigc
1973159451Srodrigc	if (mp->m_logname)
1974159451Srodrigc		seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
1975159451Srodrigc	if (mp->m_rtname)
1976159451Srodrigc		seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
1977153323Srodrigc
1978153323Srodrigc	if (mp->m_dalign > 0)
1979153323Srodrigc		sbuf_printf(m, "," MNTOPT_SUNIT "=%d",
1980153323Srodrigc				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
1981153323Srodrigc	if (mp->m_swidth > 0)
1982153323Srodrigc		sbuf_printf(m, "," MNTOPT_SWIDTH "=%d",
1983153323Srodrigc				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
1984153323Srodrigc
1985159451Srodrigc	if (!(mp->m_flags & XFS_MOUNT_IDELETE))
1986159451Srodrigc		seq_printf(m, "," MNTOPT_IKEEP);
1987159451Srodrigc	if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE))
1988159451Srodrigc		seq_printf(m, "," MNTOPT_LARGEIO);
1989159451Srodrigc
1990159451Srodrigc	if (!(vfsp->vfs_flag & VFS_32BITINODES))
1991153323Srodrigc		sbuf_printf(m, "," MNTOPT_64BITINODE);
1992159451Srodrigc	if (vfsp->vfs_flag & VFS_GRPID)
1993159451Srodrigc		seq_printf(m, "," MNTOPT_GRPID);
1994159451Srodrigc
1995153323Srodrigc	return 0;
1996153323Srodrigc}
1997153323Srodrigc
1998159451SrodrigcSTATIC void
1999159451Srodrigcxfs_freeze(
2000159451Srodrigc	bhv_desc_t	*bdp)
2001159451Srodrigc{
2002159451Srodrigc	xfs_mount_t	*mp = XFS_BHVTOM(bdp);
2003153323Srodrigc
2004159451Srodrigc	while (atomic_read(&mp->m_active_trans) > 0)
2005159451Srodrigc		delay(100);
2006159451Srodrigc
2007159451Srodrigc	/* Push the superblock and write an unmount record */
2008159451Srodrigc	xfs_log_unmount_write(mp);
2009159451Srodrigc	xfs_unmountfs_writesb(mp);
2010159451Srodrigc	xfs_fs_log_dummy(mp);
2011159451Srodrigc}
2012159451Srodrigc
2013159451Srodrigc
2014153323Srodrigcxvfsops_t xfs_vfsops = {
2015153323Srodrigc	BHV_IDENTITY_INIT(VFS_BHV_XFS,VFS_POSITION_XFS),
2016153323Srodrigc	.xvfs_parseargs		= xfs_parseargs,
2017153323Srodrigc	.xvfs_showargs		= xfs_showargs,
2018153323Srodrigc	.xvfs_mount		= xfs_mount,
2019153323Srodrigc	.xvfs_unmount		= xfs_unmount,
2020153323Srodrigc	.xvfs_mntupdate		= xfs_mntupdate,
2021153323Srodrigc	.xvfs_root		= xfs_root,
2022153323Srodrigc	.xvfs_statvfs		= xfs_statvfs,
2023153323Srodrigc	.xvfs_sync		= xfs_sync,
2024153323Srodrigc	.xvfs_vget		= xfs_vget,
2025153323Srodrigc	.xvfs_dmapiops		= (xvfs_dmapiops_t)fs_nosys,
2026153323Srodrigc	.xvfs_quotactl		= (xvfs_quotactl_t)fs_nosys,
2027153323Srodrigc	.xvfs_get_inode		= (xvfs_get_inode_t)fs_nosys,
2028153323Srodrigc	.xvfs_init_vnode	= xfs_initialize_vnode,
2029153323Srodrigc	.xvfs_force_shutdown	= xfs_do_force_shutdown,
2030159451Srodrigc	.xvfs_freeze		= xfs_freeze,
2031153323Srodrigc};
2032