ffs_vfsops.c revision 164033
1/*-
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 164033 2006-11-06 13:42:10Z rwatson $");
34
35#include "opt_mac.h"
36#include "opt_quota.h"
37#include "opt_ufs.h"
38#include "opt_ffs.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/namei.h>
43#include <sys/priv.h>
44#include <sys/proc.h>
45#include <sys/kernel.h>
46#include <sys/vnode.h>
47#include <sys/mount.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/conf.h>
51#include <sys/fcntl.h>
52#include <sys/malloc.h>
53#include <sys/mutex.h>
54
55#include <security/mac/mac_framework.h>
56
57#include <ufs/ufs/extattr.h>
58#include <ufs/ufs/gjournal.h>
59#include <ufs/ufs/quota.h>
60#include <ufs/ufs/ufsmount.h>
61#include <ufs/ufs/inode.h>
62#include <ufs/ufs/ufs_extern.h>
63
64#include <ufs/ffs/fs.h>
65#include <ufs/ffs/ffs_extern.h>
66
67#include <vm/vm.h>
68#include <vm/uma.h>
69#include <vm/vm_page.h>
70
71#include <geom/geom.h>
72#include <geom/geom_vfs.h>
73
74static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
75
76static int	ffs_reload(struct mount *, struct thread *);
77static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
78static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
79		    ufs2_daddr_t);
80static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
81static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
82static vfs_init_t ffs_init;
83static vfs_uninit_t ffs_uninit;
84static vfs_extattrctl_t ffs_extattrctl;
85static vfs_cmount_t ffs_cmount;
86static vfs_unmount_t ffs_unmount;
87static vfs_mount_t ffs_mount;
88static vfs_statfs_t ffs_statfs;
89static vfs_fhtovp_t ffs_fhtovp;
90static vfs_vptofh_t ffs_vptofh;
91static vfs_sync_t ffs_sync;
92
93static struct vfsops ufs_vfsops = {
94	.vfs_extattrctl =	ffs_extattrctl,
95	.vfs_fhtovp =		ffs_fhtovp,
96	.vfs_init =		ffs_init,
97	.vfs_mount =		ffs_mount,
98	.vfs_cmount =		ffs_cmount,
99	.vfs_quotactl =		ufs_quotactl,
100	.vfs_root =		ufs_root,
101	.vfs_statfs =		ffs_statfs,
102	.vfs_sync =		ffs_sync,
103	.vfs_uninit =		ffs_uninit,
104	.vfs_unmount =		ffs_unmount,
105	.vfs_vget =		ffs_vget,
106	.vfs_vptofh =		ffs_vptofh,
107};
108
109VFS_SET(ufs_vfsops, ufs, 0);
110MODULE_VERSION(ufs, 1);
111
112static b_strategy_t ffs_geom_strategy;
113static b_write_t ffs_bufwrite;
114
115static struct buf_ops ffs_ops = {
116	.bop_name =	"FFS",
117	.bop_write =	ffs_bufwrite,
118	.bop_strategy =	ffs_geom_strategy,
119	.bop_sync =	bufsync,
120};
121
122static const char *ffs_opts[] = { "acls", "async", "atime", "clusterr",
123    "clusterw", "exec", "export", "force", "from", "multilabel",
124    "snapshot", "suid", "suiddir", "symfollow", "sync",
125    "union", NULL };
126
127static int
128ffs_mount(struct mount *mp, struct thread *td)
129{
130	struct vnode *devvp;
131	struct ufsmount *ump = 0;
132	struct fs *fs;
133	int error, flags;
134	u_int mntorflags, mntandnotflags;
135	mode_t accessmode;
136	struct nameidata ndp;
137	char *fspec;
138
139	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
140		return (EINVAL);
141	if (uma_inode == NULL) {
142		uma_inode = uma_zcreate("FFS inode",
143		    sizeof(struct inode), NULL, NULL, NULL, NULL,
144		    UMA_ALIGN_PTR, 0);
145		uma_ufs1 = uma_zcreate("FFS1 dinode",
146		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
147		    UMA_ALIGN_PTR, 0);
148		uma_ufs2 = uma_zcreate("FFS2 dinode",
149		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
150		    UMA_ALIGN_PTR, 0);
151	}
152
153	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
154	if (error)
155		return (error);
156
157	mntorflags = 0;
158	mntandnotflags = 0;
159	if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
160		mntorflags |= MNT_ACLS;
161
162	if (vfs_getopt(mp->mnt_optnew, "async", NULL, NULL) == 0)
163		mntorflags |= MNT_ASYNC;
164
165	if (vfs_getopt(mp->mnt_optnew, "force", NULL, NULL) == 0)
166		mntorflags |= MNT_FORCE;
167
168	if (vfs_getopt(mp->mnt_optnew, "multilabel", NULL, NULL) == 0)
169		mntorflags |= MNT_MULTILABEL;
170
171	if (vfs_getopt(mp->mnt_optnew, "noasync", NULL, NULL) == 0)
172		mntandnotflags |= MNT_ASYNC;
173
174	if (vfs_getopt(mp->mnt_optnew, "noatime", NULL, NULL) == 0)
175		mntorflags |= MNT_NOATIME;
176
177	if (vfs_getopt(mp->mnt_optnew, "noclusterr", NULL, NULL) == 0)
178		mntorflags |= MNT_NOCLUSTERR;
179
180	if (vfs_getopt(mp->mnt_optnew, "noclusterw", NULL, NULL) == 0)
181		mntorflags |= MNT_NOCLUSTERW;
182
183	if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0)
184		mntorflags |= MNT_SNAPSHOT;
185
186	MNT_ILOCK(mp);
187	mp->mnt_flag = (mp->mnt_flag | mntorflags) & ~mntandnotflags;
188	MNT_IUNLOCK(mp);
189	/*
190	 * If updating, check whether changing from read-only to
191	 * read/write; if there is no device name, that's all we do.
192	 */
193	if (mp->mnt_flag & MNT_UPDATE) {
194		ump = VFSTOUFS(mp);
195		fs = ump->um_fs;
196		devvp = ump->um_devvp;
197		if (fs->fs_ronly == 0 &&
198		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
199			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
200				return (error);
201			/*
202			 * Flush any dirty data.
203			 */
204			if ((error = ffs_sync(mp, MNT_WAIT, td)) != 0) {
205				vn_finished_write(mp);
206				return (error);
207			}
208			/*
209			 * Check for and optionally get rid of files open
210			 * for writing.
211			 */
212			flags = WRITECLOSE;
213			if (mp->mnt_flag & MNT_FORCE)
214				flags |= FORCECLOSE;
215			if (mp->mnt_flag & MNT_SOFTDEP) {
216				error = softdep_flushfiles(mp, flags, td);
217			} else {
218				error = ffs_flushfiles(mp, flags, td);
219			}
220			if (error) {
221				vn_finished_write(mp);
222				return (error);
223			}
224			if (fs->fs_pendingblocks != 0 ||
225			    fs->fs_pendinginodes != 0) {
226				printf("%s: %s: blocks %jd files %d\n",
227				    fs->fs_fsmnt, "update error",
228				    (intmax_t)fs->fs_pendingblocks,
229				    fs->fs_pendinginodes);
230				fs->fs_pendingblocks = 0;
231				fs->fs_pendinginodes = 0;
232			}
233			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
234				fs->fs_clean = 1;
235			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
236				fs->fs_ronly = 0;
237				fs->fs_clean = 0;
238				vn_finished_write(mp);
239				return (error);
240			}
241			vn_finished_write(mp);
242			DROP_GIANT();
243			g_topology_lock();
244			g_access(ump->um_cp, 0, -1, 0);
245			g_topology_unlock();
246			PICKUP_GIANT();
247			fs->fs_ronly = 1;
248			MNT_ILOCK(mp);
249			mp->mnt_flag |= MNT_RDONLY;
250			MNT_IUNLOCK(mp);
251		}
252		if ((mp->mnt_flag & MNT_RELOAD) &&
253		    (error = ffs_reload(mp, td)) != 0)
254			return (error);
255		if (fs->fs_ronly &&
256		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
257			/*
258			 * If upgrade to read-write by non-root, then verify
259			 * that user has necessary permissions on the device.
260			 */
261			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
262			error = VOP_ACCESS(devvp, VREAD | VWRITE,
263			    td->td_ucred, td);
264			if (error)
265				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
266			if (error) {
267				VOP_UNLOCK(devvp, 0, td);
268				return (error);
269			}
270			VOP_UNLOCK(devvp, 0, td);
271			fs->fs_flags &= ~FS_UNCLEAN;
272			if (fs->fs_clean == 0) {
273				fs->fs_flags |= FS_UNCLEAN;
274				if ((mp->mnt_flag & MNT_FORCE) ||
275				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
276				     (fs->fs_flags & FS_DOSOFTDEP))) {
277					printf("WARNING: %s was not %s\n",
278					   fs->fs_fsmnt, "properly dismounted");
279				} else {
280					printf(
281"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
282					    fs->fs_fsmnt);
283					return (EPERM);
284				}
285			}
286			DROP_GIANT();
287			g_topology_lock();
288			/*
289			 * If we're the root device, we may not have an E count
290			 * yet, get it now.
291			 */
292			if (ump->um_cp->ace == 0)
293				error = g_access(ump->um_cp, 0, 1, 1);
294			else
295				error = g_access(ump->um_cp, 0, 1, 0);
296			g_topology_unlock();
297			PICKUP_GIANT();
298			if (error)
299				return (error);
300			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
301				return (error);
302			fs->fs_ronly = 0;
303			MNT_ILOCK(mp);
304			mp->mnt_flag &= ~MNT_RDONLY;
305			MNT_IUNLOCK(mp);
306			fs->fs_clean = 0;
307			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
308				vn_finished_write(mp);
309				return (error);
310			}
311			/* check to see if we need to start softdep */
312			if ((fs->fs_flags & FS_DOSOFTDEP) &&
313			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
314				vn_finished_write(mp);
315				return (error);
316			}
317			if (fs->fs_snapinum[0] != 0)
318				ffs_snapshot_mount(mp);
319			vn_finished_write(mp);
320		}
321		/*
322		 * Soft updates is incompatible with "async",
323		 * so if we are doing softupdates stop the user
324		 * from setting the async flag in an update.
325		 * Softdep_mount() clears it in an initial mount
326		 * or ro->rw remount.
327		 */
328		if (mp->mnt_flag & MNT_SOFTDEP) {
329			/* XXX: Reset too late ? */
330			MNT_ILOCK(mp);
331			mp->mnt_flag &= ~MNT_ASYNC;
332			MNT_IUNLOCK(mp);
333		}
334		/*
335		 * Keep MNT_ACLS flag if it is stored in superblock.
336		 */
337		if ((fs->fs_flags & FS_ACLS) != 0) {
338			/* XXX: Set too late ? */
339			MNT_ILOCK(mp);
340			mp->mnt_flag |= MNT_ACLS;
341			MNT_IUNLOCK(mp);
342		}
343
344		/*
345		 * If this is a snapshot request, take the snapshot.
346		 */
347		if (mp->mnt_flag & MNT_SNAPSHOT)
348			return (ffs_snapshot(mp, fspec));
349	}
350
351	/*
352	 * Not an update, or updating the name: look up the name
353	 * and verify that it refers to a sensible disk device.
354	 */
355	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
356	if ((error = namei(&ndp)) != 0)
357		return (error);
358	NDFREE(&ndp, NDF_ONLY_PNBUF);
359	devvp = ndp.ni_vp;
360	if (!vn_isdisk(devvp, &error)) {
361		vput(devvp);
362		return (error);
363	}
364
365	/*
366	 * If mount by non-root, then verify that user has necessary
367	 * permissions on the device.
368	 */
369	accessmode = VREAD;
370	if ((mp->mnt_flag & MNT_RDONLY) == 0)
371		accessmode |= VWRITE;
372	error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td);
373	if (error)
374		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
375	if (error) {
376		vput(devvp);
377		return (error);
378	}
379
380	if (mp->mnt_flag & MNT_UPDATE) {
381		/*
382		 * Update only
383		 *
384		 * If it's not the same vnode, or at least the same device
385		 * then it's not correct.
386		 */
387
388		if (devvp->v_rdev != ump->um_devvp->v_rdev)
389			error = EINVAL;	/* needs translation */
390		vput(devvp);
391		if (error)
392			return (error);
393	} else {
394		/*
395		 * New mount
396		 *
397		 * We need the name for the mount point (also used for
398		 * "last mounted on") copied in. If an error occurs,
399		 * the mount point is discarded by the upper level code.
400		 * Note that vfs_mount() populates f_mntonname for us.
401		 */
402		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
403			vrele(devvp);
404			return (error);
405		}
406	}
407	vfs_mountedfrom(mp, fspec);
408	return (0);
409}
410
411/*
412 * Compatibility with old mount system call.
413 */
414
415static int
416ffs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
417{
418	struct ufs_args args;
419	int error;
420
421	if (data == NULL)
422		return (EINVAL);
423	error = copyin(data, &args, sizeof args);
424	if (error)
425		return (error);
426
427	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
428	ma = mount_arg(ma, "export", &args.export, sizeof args.export);
429	error = kernel_mount(ma, flags);
430
431	return (error);
432}
433
434/*
435 * Reload all incore data for a filesystem (used after running fsck on
436 * the root filesystem and finding things to fix). The filesystem must
437 * be mounted read-only.
438 *
439 * Things to do to update the mount:
440 *	1) invalidate all cached meta-data.
441 *	2) re-read superblock from disk.
442 *	3) re-read summary information from disk.
443 *	4) invalidate all inactive vnodes.
444 *	5) invalidate all cached file data.
445 *	6) re-read inode data for all active vnodes.
446 */
447static int
448ffs_reload(struct mount *mp, struct thread *td)
449{
450	struct vnode *vp, *mvp, *devvp;
451	struct inode *ip;
452	void *space;
453	struct buf *bp;
454	struct fs *fs, *newfs;
455	struct ufsmount *ump;
456	ufs2_daddr_t sblockloc;
457	int i, blks, size, error;
458	int32_t *lp;
459
460	if ((mp->mnt_flag & MNT_RDONLY) == 0)
461		return (EINVAL);
462	ump = VFSTOUFS(mp);
463	/*
464	 * Step 1: invalidate all cached meta-data.
465	 */
466	devvp = VFSTOUFS(mp)->um_devvp;
467	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
468	if (vinvalbuf(devvp, 0, td, 0, 0) != 0)
469		panic("ffs_reload: dirty1");
470	VOP_UNLOCK(devvp, 0, td);
471
472	/*
473	 * Step 2: re-read superblock from disk.
474	 */
475	fs = VFSTOUFS(mp)->um_fs;
476	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
477	    NOCRED, &bp)) != 0)
478		return (error);
479	newfs = (struct fs *)bp->b_data;
480	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
481	     newfs->fs_magic != FS_UFS2_MAGIC) ||
482	    newfs->fs_bsize > MAXBSIZE ||
483	    newfs->fs_bsize < sizeof(struct fs)) {
484			brelse(bp);
485			return (EIO);		/* XXX needs translation */
486	}
487	/*
488	 * Copy pointer fields back into superblock before copying in	XXX
489	 * new superblock. These should really be in the ufsmount.	XXX
490	 * Note that important parameters (eg fs_ncg) are unchanged.
491	 */
492	newfs->fs_csp = fs->fs_csp;
493	newfs->fs_maxcluster = fs->fs_maxcluster;
494	newfs->fs_contigdirs = fs->fs_contigdirs;
495	newfs->fs_active = fs->fs_active;
496	/* The file system is still read-only. */
497	newfs->fs_ronly = 1;
498	sblockloc = fs->fs_sblockloc;
499	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
500	brelse(bp);
501	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
502	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
503	UFS_LOCK(ump);
504	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
505		printf("%s: reload pending error: blocks %jd files %d\n",
506		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
507		    fs->fs_pendinginodes);
508		fs->fs_pendingblocks = 0;
509		fs->fs_pendinginodes = 0;
510	}
511	UFS_UNLOCK(ump);
512
513	/*
514	 * Step 3: re-read summary information from disk.
515	 */
516	blks = howmany(fs->fs_cssize, fs->fs_fsize);
517	space = fs->fs_csp;
518	for (i = 0; i < blks; i += fs->fs_frag) {
519		size = fs->fs_bsize;
520		if (i + fs->fs_frag > blks)
521			size = (blks - i) * fs->fs_fsize;
522		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
523		    NOCRED, &bp);
524		if (error)
525			return (error);
526		bcopy(bp->b_data, space, (u_int)size);
527		space = (char *)space + size;
528		brelse(bp);
529	}
530	/*
531	 * We no longer know anything about clusters per cylinder group.
532	 */
533	if (fs->fs_contigsumsize > 0) {
534		lp = fs->fs_maxcluster;
535		for (i = 0; i < fs->fs_ncg; i++)
536			*lp++ = fs->fs_contigsumsize;
537	}
538
539loop:
540	MNT_ILOCK(mp);
541	MNT_VNODE_FOREACH(vp, mp, mvp) {
542		VI_LOCK(vp);
543		if (vp->v_iflag & VI_DOOMED) {
544			VI_UNLOCK(vp);
545			continue;
546		}
547		MNT_IUNLOCK(mp);
548		/*
549		 * Step 4: invalidate all cached file data.
550		 */
551		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
552			MNT_VNODE_FOREACH_ABORT(mp, mvp);
553			goto loop;
554		}
555		if (vinvalbuf(vp, 0, td, 0, 0))
556			panic("ffs_reload: dirty2");
557		/*
558		 * Step 5: re-read inode data for all active vnodes.
559		 */
560		ip = VTOI(vp);
561		error =
562		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
563		    (int)fs->fs_bsize, NOCRED, &bp);
564		if (error) {
565			VOP_UNLOCK(vp, 0, td);
566			vrele(vp);
567			MNT_VNODE_FOREACH_ABORT(mp, mvp);
568			return (error);
569		}
570		ffs_load_inode(bp, ip, fs, ip->i_number);
571		ip->i_effnlink = ip->i_nlink;
572		brelse(bp);
573		VOP_UNLOCK(vp, 0, td);
574		vrele(vp);
575		MNT_ILOCK(mp);
576	}
577	MNT_IUNLOCK(mp);
578	return (0);
579}
580
581/*
582 * Possible superblock locations ordered from most to least likely.
583 */
584static int sblock_try[] = SBLOCKSEARCH;
585
586/*
587 * Common code for mount and mountroot
588 */
589static int
590ffs_mountfs(devvp, mp, td)
591	struct vnode *devvp;
592	struct mount *mp;
593	struct thread *td;
594{
595	struct ufsmount *ump;
596	struct buf *bp;
597	struct fs *fs;
598	struct cdev *dev;
599	void *space;
600	ufs2_daddr_t sblockloc;
601	int error, i, blks, size, ronly;
602	int32_t *lp;
603	struct ucred *cred;
604	struct g_consumer *cp;
605	struct mount *nmp;
606
607	dev = devvp->v_rdev;
608	cred = td ? td->td_ucred : NOCRED;
609
610	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
611	DROP_GIANT();
612	g_topology_lock();
613	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
614
615	/*
616	 * If we are a root mount, drop the E flag so fsck can do its magic.
617	 * We will pick it up again when we remount R/W.
618	 */
619	if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
620		error = g_access(cp, 0, 0, -1);
621	g_topology_unlock();
622	PICKUP_GIANT();
623	VOP_UNLOCK(devvp, 0, td);
624	if (error)
625		return (error);
626	if (devvp->v_rdev->si_iosize_max != 0)
627		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
628	if (mp->mnt_iosize_max > MAXPHYS)
629		mp->mnt_iosize_max = MAXPHYS;
630
631	devvp->v_bufobj.bo_private = cp;
632	devvp->v_bufobj.bo_ops = &ffs_ops;
633
634	bp = NULL;
635	ump = NULL;
636	fs = NULL;
637	sblockloc = 0;
638	/*
639	 * Try reading the superblock in each of its possible locations.
640	 */
641	for (i = 0; sblock_try[i] != -1; i++) {
642		if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
643			error = EINVAL;
644			vfs_mount_error(mp,
645			    "Invalid sectorsize %d for superblock size %d",
646			    cp->provider->sectorsize, SBLOCKSIZE);
647			goto out;
648		}
649		if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
650		    cred, &bp)) != 0)
651			goto out;
652		fs = (struct fs *)bp->b_data;
653		sblockloc = sblock_try[i];
654		if ((fs->fs_magic == FS_UFS1_MAGIC ||
655		     (fs->fs_magic == FS_UFS2_MAGIC &&
656		      (fs->fs_sblockloc == sblockloc ||
657		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
658		    fs->fs_bsize <= MAXBSIZE &&
659		    fs->fs_bsize >= sizeof(struct fs))
660			break;
661		brelse(bp);
662		bp = NULL;
663	}
664	if (sblock_try[i] == -1) {
665		error = EINVAL;		/* XXX needs translation */
666		goto out;
667	}
668	fs->fs_fmod = 0;
669	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
670	fs->fs_flags &= ~FS_UNCLEAN;
671	if (fs->fs_clean == 0) {
672		fs->fs_flags |= FS_UNCLEAN;
673		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
674		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
675		     (fs->fs_flags & FS_DOSOFTDEP))) {
676			printf(
677"WARNING: %s was not properly dismounted\n",
678			    fs->fs_fsmnt);
679		} else {
680			printf(
681"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
682			    fs->fs_fsmnt);
683			error = EPERM;
684			goto out;
685		}
686		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
687		    (mp->mnt_flag & MNT_FORCE)) {
688			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
689			    (intmax_t)fs->fs_pendingblocks,
690			    fs->fs_pendinginodes);
691			fs->fs_pendingblocks = 0;
692			fs->fs_pendinginodes = 0;
693		}
694	}
695	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
696		printf("%s: mount pending error: blocks %jd files %d\n",
697		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
698		    fs->fs_pendinginodes);
699		fs->fs_pendingblocks = 0;
700		fs->fs_pendinginodes = 0;
701	}
702	if ((fs->fs_flags & FS_GJOURNAL) != 0) {
703#ifdef UFS_GJOURNAL
704		/*
705		 * Get journal provider name.
706		 */
707		size = 1024;
708		mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK);
709		if (g_io_getattr("GJOURNAL::provider", cp, &size,
710		    mp->mnt_gjprovider) == 0) {
711			mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size,
712			    M_UFSMNT, M_WAITOK);
713			MNT_ILOCK(mp);
714			mp->mnt_flag |= MNT_GJOURNAL;
715			MNT_IUNLOCK(mp);
716		} else {
717			printf(
718"WARNING: %s: GJOURNAL flag on fs but no gjournal provider below\n",
719			    mp->mnt_stat.f_mntonname);
720			free(mp->mnt_gjprovider, M_UFSMNT);
721			mp->mnt_gjprovider = NULL;
722		}
723#else
724		printf(
725"WARNING: %s: GJOURNAL flag on fs but no UFS_GJOURNAL support\n",
726		    mp->mnt_stat.f_mntonname);
727#endif
728	} else {
729		mp->mnt_gjprovider = NULL;
730	}
731	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
732	ump->um_cp = cp;
733	ump->um_bo = &devvp->v_bufobj;
734	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
735	if (fs->fs_magic == FS_UFS1_MAGIC) {
736		ump->um_fstype = UFS1;
737		ump->um_balloc = ffs_balloc_ufs1;
738	} else {
739		ump->um_fstype = UFS2;
740		ump->um_balloc = ffs_balloc_ufs2;
741	}
742	ump->um_blkatoff = ffs_blkatoff;
743	ump->um_truncate = ffs_truncate;
744	ump->um_update = ffs_update;
745	ump->um_valloc = ffs_valloc;
746	ump->um_vfree = ffs_vfree;
747	ump->um_ifree = ffs_ifree;
748	mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
749	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
750	if (fs->fs_sbsize < SBLOCKSIZE)
751		bp->b_flags |= B_INVAL | B_NOCACHE;
752	brelse(bp);
753	bp = NULL;
754	fs = ump->um_fs;
755	ffs_oldfscompat_read(fs, ump, sblockloc);
756	fs->fs_ronly = ronly;
757	size = fs->fs_cssize;
758	blks = howmany(size, fs->fs_fsize);
759	if (fs->fs_contigsumsize > 0)
760		size += fs->fs_ncg * sizeof(int32_t);
761	size += fs->fs_ncg * sizeof(u_int8_t);
762	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
763	fs->fs_csp = space;
764	for (i = 0; i < blks; i += fs->fs_frag) {
765		size = fs->fs_bsize;
766		if (i + fs->fs_frag > blks)
767			size = (blks - i) * fs->fs_fsize;
768		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
769		    cred, &bp)) != 0) {
770			free(fs->fs_csp, M_UFSMNT);
771			goto out;
772		}
773		bcopy(bp->b_data, space, (u_int)size);
774		space = (char *)space + size;
775		brelse(bp);
776		bp = NULL;
777	}
778	if (fs->fs_contigsumsize > 0) {
779		fs->fs_maxcluster = lp = space;
780		for (i = 0; i < fs->fs_ncg; i++)
781			*lp++ = fs->fs_contigsumsize;
782		space = lp;
783	}
784	size = fs->fs_ncg * sizeof(u_int8_t);
785	fs->fs_contigdirs = (u_int8_t *)space;
786	bzero(fs->fs_contigdirs, size);
787	fs->fs_active = NULL;
788	mp->mnt_data = (qaddr_t)ump;
789	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
790	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
791	nmp = NULL;
792	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
793	    (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
794		if (nmp)
795			vfs_rel(nmp);
796		vfs_getnewfsid(mp);
797	}
798	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
799	MNT_ILOCK(mp);
800	mp->mnt_flag |= MNT_LOCAL;
801	MNT_IUNLOCK(mp);
802	if ((fs->fs_flags & FS_MULTILABEL) != 0) {
803#ifdef MAC
804		MNT_ILOCK(mp);
805		mp->mnt_flag |= MNT_MULTILABEL;
806		MNT_IUNLOCK(mp);
807#else
808		printf(
809"WARNING: %s: multilabel flag on fs but no MAC support\n",
810		    mp->mnt_stat.f_mntonname);
811#endif
812	}
813	if ((fs->fs_flags & FS_ACLS) != 0) {
814#ifdef UFS_ACL
815		MNT_ILOCK(mp);
816		mp->mnt_flag |= MNT_ACLS;
817		MNT_IUNLOCK(mp);
818#else
819		printf(
820"WARNING: %s: ACLs flag on fs but no ACLs support\n",
821		    mp->mnt_stat.f_mntonname);
822#endif
823	}
824	ump->um_mountp = mp;
825	ump->um_dev = dev;
826	ump->um_devvp = devvp;
827	ump->um_nindir = fs->fs_nindir;
828	ump->um_bptrtodb = fs->fs_fsbtodb;
829	ump->um_seqinc = fs->fs_frag;
830	for (i = 0; i < MAXQUOTAS; i++)
831		ump->um_quotas[i] = NULLVP;
832#ifdef UFS_EXTATTR
833	ufs_extattr_uepm_init(&ump->um_extattr);
834#endif
835	/*
836	 * Set FS local "last mounted on" information (NULL pad)
837	 */
838	bzero(fs->fs_fsmnt, MAXMNTLEN);
839	strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
840
841	if( mp->mnt_flag & MNT_ROOTFS) {
842		/*
843		 * Root mount; update timestamp in mount structure.
844		 * this will be used by the common root mount code
845		 * to update the system clock.
846		 */
847		mp->mnt_time = fs->fs_time;
848	}
849
850	if (ronly == 0) {
851		if ((fs->fs_flags & FS_DOSOFTDEP) &&
852		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
853			free(fs->fs_csp, M_UFSMNT);
854			goto out;
855		}
856		if (fs->fs_snapinum[0] != 0)
857			ffs_snapshot_mount(mp);
858		fs->fs_fmod = 1;
859		fs->fs_clean = 0;
860		(void) ffs_sbupdate(ump, MNT_WAIT, 0);
861	}
862	/*
863	 * Initialize filesystem stat information in mount struct.
864	 */
865#ifdef UFS_EXTATTR
866#ifdef UFS_EXTATTR_AUTOSTART
867	/*
868	 *
869	 * Auto-starting does the following:
870	 *	- check for /.attribute in the fs, and extattr_start if so
871	 *	- for each file in .attribute, enable that file with
872	 * 	  an attribute of the same name.
873	 * Not clear how to report errors -- probably eat them.
874	 * This would all happen while the filesystem was busy/not
875	 * available, so would effectively be "atomic".
876	 */
877	(void) ufs_extattr_autostart(mp, td);
878#endif /* !UFS_EXTATTR_AUTOSTART */
879#endif /* !UFS_EXTATTR */
880#ifdef QUOTA
881	/*
882	 * Our bufobj must require giant for snapshots when quotas are
883	 * enabled.
884	 */
885	BO_LOCK(&devvp->v_bufobj);
886	devvp->v_bufobj.bo_flag |= BO_NEEDSGIANT;
887	BO_UNLOCK(&devvp->v_bufobj);
888#else
889	MNT_ILOCK(mp);
890	mp->mnt_kern_flag |= MNTK_MPSAFE;
891	MNT_IUNLOCK(mp);
892#endif
893	return (0);
894out:
895	if (bp)
896		brelse(bp);
897	if (cp != NULL) {
898		DROP_GIANT();
899		g_topology_lock();
900		g_vfs_close(cp, td);
901		g_topology_unlock();
902		PICKUP_GIANT();
903	}
904	if (ump) {
905		mtx_destroy(UFS_MTX(ump));
906		if (mp->mnt_gjprovider != NULL) {
907			free(mp->mnt_gjprovider, M_UFSMNT);
908			mp->mnt_gjprovider = NULL;
909		}
910		free(ump->um_fs, M_UFSMNT);
911		free(ump, M_UFSMNT);
912		mp->mnt_data = (qaddr_t)0;
913	}
914	return (error);
915}
916
917#include <sys/sysctl.h>
918static int bigcgs = 0;
919SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
920
921/*
922 * Sanity checks for loading old filesystem superblocks.
923 * See ffs_oldfscompat_write below for unwound actions.
924 *
925 * XXX - Parts get retired eventually.
926 * Unfortunately new bits get added.
927 */
928static void
929ffs_oldfscompat_read(fs, ump, sblockloc)
930	struct fs *fs;
931	struct ufsmount *ump;
932	ufs2_daddr_t sblockloc;
933{
934	off_t maxfilesize;
935
936	/*
937	 * If not yet done, update fs_flags location and value of fs_sblockloc.
938	 */
939	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
940		fs->fs_flags = fs->fs_old_flags;
941		fs->fs_old_flags |= FS_FLAGS_UPDATED;
942		fs->fs_sblockloc = sblockloc;
943	}
944	/*
945	 * If not yet done, update UFS1 superblock with new wider fields.
946	 */
947	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
948		fs->fs_maxbsize = fs->fs_bsize;
949		fs->fs_time = fs->fs_old_time;
950		fs->fs_size = fs->fs_old_size;
951		fs->fs_dsize = fs->fs_old_dsize;
952		fs->fs_csaddr = fs->fs_old_csaddr;
953		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
954		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
955		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
956		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
957	}
958	if (fs->fs_magic == FS_UFS1_MAGIC &&
959	    fs->fs_old_inodefmt < FS_44INODEFMT) {
960		fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
961		fs->fs_qbmask = ~fs->fs_bmask;
962		fs->fs_qfmask = ~fs->fs_fmask;
963	}
964	if (fs->fs_magic == FS_UFS1_MAGIC) {
965		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
966		maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
967		if (fs->fs_maxfilesize > maxfilesize)
968			fs->fs_maxfilesize = maxfilesize;
969	}
970	/* Compatibility for old filesystems */
971	if (fs->fs_avgfilesize <= 0)
972		fs->fs_avgfilesize = AVFILESIZ;
973	if (fs->fs_avgfpdir <= 0)
974		fs->fs_avgfpdir = AFPDIR;
975	if (bigcgs) {
976		fs->fs_save_cgsize = fs->fs_cgsize;
977		fs->fs_cgsize = fs->fs_bsize;
978	}
979}
980
981/*
982 * Unwinding superblock updates for old filesystems.
983 * See ffs_oldfscompat_read above for details.
984 *
985 * XXX - Parts get retired eventually.
986 * Unfortunately new bits get added.
987 */
988static void
989ffs_oldfscompat_write(fs, ump)
990	struct fs *fs;
991	struct ufsmount *ump;
992{
993
994	/*
995	 * Copy back UFS2 updated fields that UFS1 inspects.
996	 */
997	if (fs->fs_magic == FS_UFS1_MAGIC) {
998		fs->fs_old_time = fs->fs_time;
999		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1000		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1001		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1002		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1003		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
1004	}
1005	if (bigcgs) {
1006		fs->fs_cgsize = fs->fs_save_cgsize;
1007		fs->fs_save_cgsize = 0;
1008	}
1009}
1010
1011/*
1012 * unmount system call
1013 */
1014static int
1015ffs_unmount(mp, mntflags, td)
1016	struct mount *mp;
1017	int mntflags;
1018	struct thread *td;
1019{
1020	struct ufsmount *ump = VFSTOUFS(mp);
1021	struct fs *fs;
1022	int error, flags;
1023
1024	flags = 0;
1025	if (mntflags & MNT_FORCE) {
1026		flags |= FORCECLOSE;
1027	}
1028#ifdef UFS_EXTATTR
1029	if ((error = ufs_extattr_stop(mp, td))) {
1030		if (error != EOPNOTSUPP)
1031			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
1032			    error);
1033	} else {
1034		ufs_extattr_uepm_destroy(&ump->um_extattr);
1035	}
1036#endif
1037	if (mp->mnt_flag & MNT_SOFTDEP) {
1038		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
1039			return (error);
1040	} else {
1041		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
1042			return (error);
1043	}
1044	fs = ump->um_fs;
1045	UFS_LOCK(ump);
1046	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1047		printf("%s: unmount pending error: blocks %jd files %d\n",
1048		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1049		    fs->fs_pendinginodes);
1050		fs->fs_pendingblocks = 0;
1051		fs->fs_pendinginodes = 0;
1052	}
1053	UFS_UNLOCK(ump);
1054	if (fs->fs_ronly == 0) {
1055		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1056		error = ffs_sbupdate(ump, MNT_WAIT, 0);
1057		if (error) {
1058			fs->fs_clean = 0;
1059			return (error);
1060		}
1061	}
1062	DROP_GIANT();
1063	g_topology_lock();
1064	g_vfs_close(ump->um_cp, td);
1065	g_topology_unlock();
1066	PICKUP_GIANT();
1067	vrele(ump->um_devvp);
1068	mtx_destroy(UFS_MTX(ump));
1069	if (mp->mnt_gjprovider != NULL) {
1070		free(mp->mnt_gjprovider, M_UFSMNT);
1071		mp->mnt_gjprovider = NULL;
1072	}
1073	free(fs->fs_csp, M_UFSMNT);
1074	free(fs, M_UFSMNT);
1075	free(ump, M_UFSMNT);
1076	mp->mnt_data = (qaddr_t)0;
1077	MNT_ILOCK(mp);
1078	mp->mnt_flag &= ~MNT_LOCAL;
1079	MNT_IUNLOCK(mp);
1080	return (error);
1081}
1082
1083/*
1084 * Flush out all the files in a filesystem.
1085 */
1086int
1087ffs_flushfiles(mp, flags, td)
1088	struct mount *mp;
1089	int flags;
1090	struct thread *td;
1091{
1092	struct ufsmount *ump;
1093	int error;
1094
1095	ump = VFSTOUFS(mp);
1096#ifdef QUOTA
1097	if (mp->mnt_flag & MNT_QUOTA) {
1098		int i;
1099		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1100		if (error)
1101			return (error);
1102		for (i = 0; i < MAXQUOTAS; i++) {
1103			if (ump->um_quotas[i] == NULLVP)
1104				continue;
1105			quotaoff(td, mp, i);
1106		}
1107		/*
1108		 * Here we fall through to vflush again to ensure
1109		 * that we have gotten rid of all the system vnodes.
1110		 */
1111	}
1112#endif
1113	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1114	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1115		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1116			return (error);
1117		ffs_snapshot_unmount(mp);
1118		flags |= FORCECLOSE;
1119		/*
1120		 * Here we fall through to vflush again to ensure
1121		 * that we have gotten rid of all the system vnodes.
1122		 */
1123	}
1124        /*
1125	 * Flush all the files.
1126	 */
1127	if ((error = vflush(mp, 0, flags, td)) != 0)
1128		return (error);
1129	/*
1130	 * Flush filesystem metadata.
1131	 */
1132	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1133	error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1134	VOP_UNLOCK(ump->um_devvp, 0, td);
1135	return (error);
1136}
1137
1138/*
1139 * Get filesystem statistics.
1140 */
1141static int
1142ffs_statfs(mp, sbp, td)
1143	struct mount *mp;
1144	struct statfs *sbp;
1145	struct thread *td;
1146{
1147	struct ufsmount *ump;
1148	struct fs *fs;
1149
1150	ump = VFSTOUFS(mp);
1151	fs = ump->um_fs;
1152	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1153		panic("ffs_statfs");
1154	sbp->f_version = STATFS_VERSION;
1155	sbp->f_bsize = fs->fs_fsize;
1156	sbp->f_iosize = fs->fs_bsize;
1157	sbp->f_blocks = fs->fs_dsize;
1158	UFS_LOCK(ump);
1159	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1160	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1161	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1162	    dbtofsb(fs, fs->fs_pendingblocks);
1163	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1164	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1165	UFS_UNLOCK(ump);
1166	sbp->f_namemax = NAME_MAX;
1167	return (0);
1168}
1169
1170/*
1171 * Go through the disk queues to initiate sandbagged IO;
1172 * go through the inodes to write those that have been modified;
1173 * initiate the writing of the super block if it has been modified.
1174 *
1175 * Note: we are always called with the filesystem marked `MPBUSY'.
1176 */
1177static int
1178ffs_sync(mp, waitfor, td)
1179	struct mount *mp;
1180	int waitfor;
1181	struct thread *td;
1182{
1183	struct vnode *mvp, *vp, *devvp;
1184	struct inode *ip;
1185	struct ufsmount *ump = VFSTOUFS(mp);
1186	struct fs *fs;
1187	int error, count, wait, lockreq, allerror = 0;
1188	int suspend;
1189	int suspended;
1190	int secondary_writes;
1191	int secondary_accwrites;
1192	int softdep_deps;
1193	int softdep_accdeps;
1194	struct bufobj *bo;
1195
1196	fs = ump->um_fs;
1197	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1198		printf("fs = %s\n", fs->fs_fsmnt);
1199		panic("ffs_sync: rofs mod");
1200	}
1201	/*
1202	 * Write back each (modified) inode.
1203	 */
1204	wait = 0;
1205	suspend = 0;
1206	suspended = 0;
1207	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1208	if (waitfor == MNT_SUSPEND) {
1209		suspend = 1;
1210		waitfor = MNT_WAIT;
1211	}
1212	if (waitfor == MNT_WAIT) {
1213		wait = 1;
1214		lockreq = LK_EXCLUSIVE;
1215	}
1216	lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
1217	MNT_ILOCK(mp);
1218loop:
1219	/* Grab snapshot of secondary write counts */
1220	secondary_writes = mp->mnt_secondary_writes;
1221	secondary_accwrites = mp->mnt_secondary_accwrites;
1222
1223	/* Grab snapshot of softdep dependency counts */
1224	MNT_IUNLOCK(mp);
1225	softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1226	MNT_ILOCK(mp);
1227
1228	MNT_VNODE_FOREACH(vp, mp, mvp) {
1229		/*
1230		 * Depend on the mntvnode_slock to keep things stable enough
1231		 * for a quick test.  Since there might be hundreds of
1232		 * thousands of vnodes, we cannot afford even a subroutine
1233		 * call unless there's a good chance that we have work to do.
1234		 */
1235		VI_LOCK(vp);
1236		if (vp->v_iflag & VI_DOOMED) {
1237			VI_UNLOCK(vp);
1238			continue;
1239		}
1240		ip = VTOI(vp);
1241		if (vp->v_type == VNON || ((ip->i_flag &
1242		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1243		    vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1244			VI_UNLOCK(vp);
1245			continue;
1246		}
1247		MNT_IUNLOCK(mp);
1248		if ((error = vget(vp, lockreq, td)) != 0) {
1249			MNT_ILOCK(mp);
1250			if (error == ENOENT || error == ENOLCK) {
1251				MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1252				goto loop;
1253			}
1254			continue;
1255		}
1256		if ((error = ffs_syncvnode(vp, waitfor)) != 0)
1257			allerror = error;
1258		vput(vp);
1259		MNT_ILOCK(mp);
1260	}
1261	MNT_IUNLOCK(mp);
1262	/*
1263	 * Force stale filesystem control information to be flushed.
1264	 */
1265	if (waitfor == MNT_WAIT) {
1266		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1267			allerror = error;
1268		/* Flushed work items may create new vnodes to clean */
1269		if (allerror == 0 && count) {
1270			MNT_ILOCK(mp);
1271			goto loop;
1272		}
1273	}
1274#ifdef QUOTA
1275	qsync(mp);
1276#endif
1277	devvp = ump->um_devvp;
1278	VI_LOCK(devvp);
1279	bo = &devvp->v_bufobj;
1280	if (waitfor != MNT_LAZY &&
1281	    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1282		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1283		if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
1284			allerror = error;
1285		VOP_UNLOCK(devvp, 0, td);
1286		if (allerror == 0 && waitfor == MNT_WAIT) {
1287			MNT_ILOCK(mp);
1288			goto loop;
1289		}
1290	} else if (suspend != 0) {
1291		if (softdep_check_suspend(mp,
1292					  devvp,
1293					  softdep_deps,
1294					  softdep_accdeps,
1295					  secondary_writes,
1296					  secondary_accwrites) != 0)
1297			goto loop;	/* More work needed */
1298		mtx_assert(MNT_MTX(mp), MA_OWNED);
1299		mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1300		MNT_IUNLOCK(mp);
1301		suspended = 1;
1302	} else
1303		VI_UNLOCK(devvp);
1304	/*
1305	 * Write back modified superblock.
1306	 */
1307	if (fs->fs_fmod != 0 &&
1308	    (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1309		allerror = error;
1310	return (allerror);
1311}
1312
1313int
1314ffs_vget(mp, ino, flags, vpp)
1315	struct mount *mp;
1316	ino_t ino;
1317	int flags;
1318	struct vnode **vpp;
1319{
1320	struct fs *fs;
1321	struct inode *ip;
1322	struct ufsmount *ump;
1323	struct buf *bp;
1324	struct vnode *vp;
1325	struct cdev *dev;
1326	int error;
1327
1328	error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1329	if (error || *vpp != NULL)
1330		return (error);
1331
1332	/*
1333	 * We must promote to an exclusive lock for vnode creation.  This
1334	 * can happen if lookup is passed LOCKSHARED.
1335 	 */
1336	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1337		flags &= ~LK_TYPE_MASK;
1338		flags |= LK_EXCLUSIVE;
1339	}
1340
1341	/*
1342	 * We do not lock vnode creation as it is believed to be too
1343	 * expensive for such rare case as simultaneous creation of vnode
1344	 * for same ino by different processes. We just allow them to race
1345	 * and check later to decide who wins. Let the race begin!
1346	 */
1347
1348	ump = VFSTOUFS(mp);
1349	dev = ump->um_dev;
1350	fs = ump->um_fs;
1351
1352	/*
1353	 * If this MALLOC() is performed after the getnewvnode()
1354	 * it might block, leaving a vnode with a NULL v_data to be
1355	 * found by ffs_sync() if a sync happens to fire right then,
1356	 * which will cause a panic because ffs_sync() blindly
1357	 * dereferences vp->v_data (as well it should).
1358	 */
1359	ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
1360
1361	/* Allocate a new vnode/inode. */
1362	if (fs->fs_magic == FS_UFS1_MAGIC)
1363		error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
1364	else
1365		error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
1366	if (error) {
1367		*vpp = NULL;
1368		uma_zfree(uma_inode, ip);
1369		return (error);
1370	}
1371	/*
1372	 * FFS supports recursive and shared locking.
1373	 */
1374	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1375	vp->v_vnlock->lk_flags &= ~LK_NOSHARE;
1376	vp->v_data = ip;
1377	vp->v_bufobj.bo_bsize = fs->fs_bsize;
1378	ip->i_vnode = vp;
1379	ip->i_ump = ump;
1380	ip->i_fs = fs;
1381	ip->i_dev = dev;
1382	ip->i_number = ino;
1383#ifdef QUOTA
1384	{
1385		int i;
1386		for (i = 0; i < MAXQUOTAS; i++)
1387			ip->i_dquot[i] = NODQUOT;
1388	}
1389#endif
1390
1391	error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
1392	if (error || *vpp != NULL)
1393		return (error);
1394
1395	/* Read in the disk contents for the inode, copy into the inode. */
1396	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1397	    (int)fs->fs_bsize, NOCRED, &bp);
1398	if (error) {
1399		/*
1400		 * The inode does not contain anything useful, so it would
1401		 * be misleading to leave it on its hash chain. With mode
1402		 * still zero, it will be unlinked and returned to the free
1403		 * list by vput().
1404		 */
1405		brelse(bp);
1406		vput(vp);
1407		*vpp = NULL;
1408		return (error);
1409	}
1410	if (ip->i_ump->um_fstype == UFS1)
1411		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1412	else
1413		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1414	ffs_load_inode(bp, ip, fs, ino);
1415	if (DOINGSOFTDEP(vp))
1416		softdep_load_inodeblock(ip);
1417	else
1418		ip->i_effnlink = ip->i_nlink;
1419	bqrelse(bp);
1420
1421	/*
1422	 * Initialize the vnode from the inode, check for aliases.
1423	 * Note that the underlying vnode may have changed.
1424	 */
1425	if (ip->i_ump->um_fstype == UFS1)
1426		error = ufs_vinit(mp, &ffs_fifoops1, &vp);
1427	else
1428		error = ufs_vinit(mp, &ffs_fifoops2, &vp);
1429	if (error) {
1430		vput(vp);
1431		*vpp = NULL;
1432		return (error);
1433	}
1434
1435	/*
1436	 * Finish inode initialization.
1437	 */
1438
1439	/*
1440	 * Set up a generation number for this inode if it does not
1441	 * already have one. This should only happen on old filesystems.
1442	 */
1443	if (ip->i_gen == 0) {
1444		ip->i_gen = arc4random() / 2 + 1;
1445		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1446			ip->i_flag |= IN_MODIFIED;
1447			DIP_SET(ip, i_gen, ip->i_gen);
1448		}
1449	}
1450	/*
1451	 * Ensure that uid and gid are correct. This is a temporary
1452	 * fix until fsck has been changed to do the update.
1453	 */
1454	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1455	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1456		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1457		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1458	}						/* XXX */
1459
1460#ifdef MAC
1461	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1462		/*
1463		 * If this vnode is already allocated, and we're running
1464		 * multi-label, attempt to perform a label association
1465		 * from the extended attributes on the inode.
1466		 */
1467		error = mac_associate_vnode_extattr(mp, vp);
1468		if (error) {
1469			/* ufs_inactive will release ip->i_devvp ref. */
1470			vput(vp);
1471			*vpp = NULL;
1472			return (error);
1473		}
1474	}
1475#endif
1476
1477	*vpp = vp;
1478	return (0);
1479}
1480
1481/*
1482 * File handle to vnode
1483 *
1484 * Have to be really careful about stale file handles:
1485 * - check that the inode number is valid
1486 * - call ffs_vget() to get the locked inode
1487 * - check for an unallocated inode (i_mode == 0)
1488 * - check that the given client host has export rights and return
1489 *   those rights via. exflagsp and credanonp
1490 */
1491static int
1492ffs_fhtovp(mp, fhp, vpp)
1493	struct mount *mp;
1494	struct fid *fhp;
1495	struct vnode **vpp;
1496{
1497	struct ufid *ufhp;
1498	struct fs *fs;
1499
1500	ufhp = (struct ufid *)fhp;
1501	fs = VFSTOUFS(mp)->um_fs;
1502	if (ufhp->ufid_ino < ROOTINO ||
1503	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1504		return (ESTALE);
1505	return (ufs_fhtovp(mp, ufhp, vpp));
1506}
1507
1508/*
1509 * Vnode pointer to File handle
1510 */
1511/* ARGSUSED */
1512static int
1513ffs_vptofh(vp, fhp)
1514	struct vnode *vp;
1515	struct fid *fhp;
1516{
1517	struct inode *ip;
1518	struct ufid *ufhp;
1519
1520	ip = VTOI(vp);
1521	ufhp = (struct ufid *)fhp;
1522	ufhp->ufid_len = sizeof(struct ufid);
1523	ufhp->ufid_ino = ip->i_number;
1524	ufhp->ufid_gen = ip->i_gen;
1525	return (0);
1526}
1527
1528/*
1529 * Initialize the filesystem.
1530 */
1531static int
1532ffs_init(vfsp)
1533	struct vfsconf *vfsp;
1534{
1535
1536	softdep_initialize();
1537	return (ufs_init(vfsp));
1538}
1539
1540/*
1541 * Undo the work of ffs_init().
1542 */
1543static int
1544ffs_uninit(vfsp)
1545	struct vfsconf *vfsp;
1546{
1547	int ret;
1548
1549	ret = ufs_uninit(vfsp);
1550	softdep_uninitialize();
1551	return (ret);
1552}
1553
1554/*
1555 * Write a superblock and associated information back to disk.
1556 */
1557int
1558ffs_sbupdate(mp, waitfor, suspended)
1559	struct ufsmount *mp;
1560	int waitfor;
1561	int suspended;
1562{
1563	struct fs *fs = mp->um_fs;
1564	struct buf *sbbp;
1565	struct buf *bp;
1566	int blks;
1567	void *space;
1568	int i, size, error, allerror = 0;
1569
1570	if (fs->fs_ronly == 1 &&
1571	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1572	    (MNT_RDONLY | MNT_UPDATE))
1573		panic("ffs_sbupdate: write read-only filesystem");
1574	/*
1575	 * We use the superblock's buf to serialize calls to ffs_sbupdate().
1576	 */
1577	sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1578	    0, 0, 0);
1579	/*
1580	 * First write back the summary information.
1581	 */
1582	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1583	space = fs->fs_csp;
1584	for (i = 0; i < blks; i += fs->fs_frag) {
1585		size = fs->fs_bsize;
1586		if (i + fs->fs_frag > blks)
1587			size = (blks - i) * fs->fs_fsize;
1588		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1589		    size, 0, 0, 0);
1590		bcopy(space, bp->b_data, (u_int)size);
1591		space = (char *)space + size;
1592		if (suspended)
1593			bp->b_flags |= B_VALIDSUSPWRT;
1594		if (waitfor != MNT_WAIT)
1595			bawrite(bp);
1596		else if ((error = bwrite(bp)) != 0)
1597			allerror = error;
1598	}
1599	/*
1600	 * Now write back the superblock itself. If any errors occurred
1601	 * up to this point, then fail so that the superblock avoids
1602	 * being written out as clean.
1603	 */
1604	if (allerror) {
1605		brelse(sbbp);
1606		return (allerror);
1607	}
1608	bp = sbbp;
1609	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1610	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1611		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1612		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1613		fs->fs_sblockloc = SBLOCK_UFS1;
1614	}
1615	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1616	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1617		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1618		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1619		fs->fs_sblockloc = SBLOCK_UFS2;
1620	}
1621	fs->fs_fmod = 0;
1622	fs->fs_time = time_second;
1623	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1624	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1625	if (suspended)
1626		bp->b_flags |= B_VALIDSUSPWRT;
1627	if (waitfor != MNT_WAIT)
1628		bawrite(bp);
1629	else if ((error = bwrite(bp)) != 0)
1630		allerror = error;
1631	return (allerror);
1632}
1633
1634static int
1635ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1636	int attrnamespace, const char *attrname, struct thread *td)
1637{
1638
1639#ifdef UFS_EXTATTR
1640	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1641	    attrname, td));
1642#else
1643	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1644	    attrname, td));
1645#endif
1646}
1647
1648static void
1649ffs_ifree(struct ufsmount *ump, struct inode *ip)
1650{
1651
1652	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1653		uma_zfree(uma_ufs1, ip->i_din1);
1654	else if (ip->i_din2 != NULL)
1655		uma_zfree(uma_ufs2, ip->i_din2);
1656	uma_zfree(uma_inode, ip);
1657}
1658
1659static int dobkgrdwrite = 1;
1660SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
1661    "Do background writes (honoring the BV_BKGRDWRITE flag)?");
1662
1663/*
1664 * Complete a background write started from bwrite.
1665 */
1666static void
1667ffs_backgroundwritedone(struct buf *bp)
1668{
1669	struct bufobj *bufobj;
1670	struct buf *origbp;
1671
1672	/*
1673	 * Find the original buffer that we are writing.
1674	 */
1675	bufobj = bp->b_bufobj;
1676	BO_LOCK(bufobj);
1677	if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
1678		panic("backgroundwritedone: lost buffer");
1679	/* Grab an extra reference to be dropped by the bufdone() below. */
1680	bufobj_wrefl(bufobj);
1681	BO_UNLOCK(bufobj);
1682	/*
1683	 * Process dependencies then return any unfinished ones.
1684	 */
1685	if (LIST_FIRST(&bp->b_dep) != NULL)
1686		buf_complete(bp);
1687#ifdef SOFTUPDATES
1688	if (LIST_FIRST(&bp->b_dep) != NULL)
1689		softdep_move_dependencies(bp, origbp);
1690#endif
1691	/*
1692	 * This buffer is marked B_NOCACHE so when it is released
1693	 * by biodone it will be tossed.
1694	 */
1695	bp->b_flags |= B_NOCACHE;
1696	bp->b_flags &= ~B_CACHE;
1697	bufdone(bp);
1698	BO_LOCK(bufobj);
1699	/*
1700	 * Clear the BV_BKGRDINPROG flag in the original buffer
1701	 * and awaken it if it is waiting for the write to complete.
1702	 * If BV_BKGRDINPROG is not set in the original buffer it must
1703	 * have been released and re-instantiated - which is not legal.
1704	 */
1705	KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
1706	    ("backgroundwritedone: lost buffer2"));
1707	origbp->b_vflags &= ~BV_BKGRDINPROG;
1708	if (origbp->b_vflags & BV_BKGRDWAIT) {
1709		origbp->b_vflags &= ~BV_BKGRDWAIT;
1710		wakeup(&origbp->b_xflags);
1711	}
1712	BO_UNLOCK(bufobj);
1713}
1714
1715
1716/*
1717 * Write, release buffer on completion.  (Done by iodone
1718 * if async).  Do not bother writing anything if the buffer
1719 * is invalid.
1720 *
1721 * Note that we set B_CACHE here, indicating that buffer is
1722 * fully valid and thus cacheable.  This is true even of NFS
1723 * now so we set it generally.  This could be set either here
1724 * or in biodone() since the I/O is synchronous.  We put it
1725 * here.
1726 */
1727static int
1728ffs_bufwrite(struct buf *bp)
1729{
1730	int oldflags, s;
1731	struct buf *newbp;
1732
1733	CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
1734	if (bp->b_flags & B_INVAL) {
1735		brelse(bp);
1736		return (0);
1737	}
1738
1739	oldflags = bp->b_flags;
1740
1741	if (BUF_REFCNT(bp) == 0)
1742		panic("bufwrite: buffer is not busy???");
1743	s = splbio();
1744	/*
1745	 * If a background write is already in progress, delay
1746	 * writing this block if it is asynchronous. Otherwise
1747	 * wait for the background write to complete.
1748	 */
1749	BO_LOCK(bp->b_bufobj);
1750	if (bp->b_vflags & BV_BKGRDINPROG) {
1751		if (bp->b_flags & B_ASYNC) {
1752			BO_UNLOCK(bp->b_bufobj);
1753			splx(s);
1754			bdwrite(bp);
1755			return (0);
1756		}
1757		bp->b_vflags |= BV_BKGRDWAIT;
1758		msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
1759		if (bp->b_vflags & BV_BKGRDINPROG)
1760			panic("bufwrite: still writing");
1761	}
1762	BO_UNLOCK(bp->b_bufobj);
1763
1764	/* Mark the buffer clean */
1765	bundirty(bp);
1766
1767	/*
1768	 * If this buffer is marked for background writing and we
1769	 * do not have to wait for it, make a copy and write the
1770	 * copy so as to leave this buffer ready for further use.
1771	 *
1772	 * This optimization eats a lot of memory.  If we have a page
1773	 * or buffer shortfall we can't do it.
1774	 */
1775	if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
1776	    (bp->b_flags & B_ASYNC) &&
1777	    !vm_page_count_severe() &&
1778	    !buf_dirty_count_severe()) {
1779		KASSERT(bp->b_iodone == NULL,
1780		    ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
1781
1782		/* get a new block */
1783		newbp = geteblk(bp->b_bufsize);
1784
1785		/*
1786		 * set it to be identical to the old block.  We have to
1787		 * set b_lblkno and BKGRDMARKER before calling bgetvp()
1788		 * to avoid confusing the splay tree and gbincore().
1789		 */
1790		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
1791		newbp->b_lblkno = bp->b_lblkno;
1792		newbp->b_xflags |= BX_BKGRDMARKER;
1793		BO_LOCK(bp->b_bufobj);
1794		bp->b_vflags |= BV_BKGRDINPROG;
1795		bgetvp(bp->b_vp, newbp);
1796		BO_UNLOCK(bp->b_bufobj);
1797		newbp->b_bufobj = &bp->b_vp->v_bufobj;
1798		newbp->b_blkno = bp->b_blkno;
1799		newbp->b_offset = bp->b_offset;
1800		newbp->b_iodone = ffs_backgroundwritedone;
1801		newbp->b_flags |= B_ASYNC;
1802		newbp->b_flags &= ~B_INVAL;
1803
1804#ifdef SOFTUPDATES
1805		/* move over the dependencies */
1806		if (LIST_FIRST(&bp->b_dep) != NULL)
1807			softdep_move_dependencies(bp, newbp);
1808#endif
1809
1810		/*
1811		 * Initiate write on the copy, release the original to
1812		 * the B_LOCKED queue so that it cannot go away until
1813		 * the background write completes. If not locked it could go
1814		 * away and then be reconstituted while it was being written.
1815		 * If the reconstituted buffer were written, we could end up
1816		 * with two background copies being written at the same time.
1817		 */
1818		bqrelse(bp);
1819		bp = newbp;
1820	}
1821
1822	/* Let the normal bufwrite do the rest for us */
1823	return (bufwrite(bp));
1824}
1825
1826
1827static void
1828ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1829{
1830	struct vnode *vp;
1831	int error;
1832	struct buf *tbp;
1833
1834	vp = bo->__bo_vnode;
1835	if (bp->b_iocmd == BIO_WRITE) {
1836		if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
1837		    bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
1838		    (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
1839			panic("ffs_geom_strategy: bad I/O");
1840		bp->b_flags &= ~B_VALIDSUSPWRT;
1841		if ((vp->v_vflag & VV_COPYONWRITE) &&
1842		    vp->v_rdev->si_snapdata != NULL) {
1843			if ((bp->b_flags & B_CLUSTER) != 0) {
1844				runningbufwakeup(bp);
1845				TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1846					      b_cluster.cluster_entry) {
1847					error = ffs_copyonwrite(vp, tbp);
1848					if (error != 0 &&
1849					    error != EOPNOTSUPP) {
1850						bp->b_error = error;
1851						bp->b_ioflags |= BIO_ERROR;
1852						bufdone(bp);
1853						return;
1854					}
1855				}
1856				bp->b_runningbufspace = bp->b_bufsize;
1857				atomic_add_int(&runningbufspace,
1858					       bp->b_runningbufspace);
1859			} else {
1860				error = ffs_copyonwrite(vp, bp);
1861				if (error != 0 && error != EOPNOTSUPP) {
1862					bp->b_error = error;
1863					bp->b_ioflags |= BIO_ERROR;
1864					bufdone(bp);
1865					return;
1866				}
1867			}
1868		}
1869#ifdef SOFTUPDATES
1870		if ((bp->b_flags & B_CLUSTER) != 0) {
1871			TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1872				      b_cluster.cluster_entry) {
1873				if (LIST_FIRST(&tbp->b_dep) != NULL)
1874					buf_start(tbp);
1875			}
1876		} else {
1877			if (LIST_FIRST(&bp->b_dep) != NULL)
1878				buf_start(bp);
1879		}
1880
1881#endif
1882	}
1883	g_vfs_strategy(bo, bp);
1884}
1885