ffs_vfsops.c revision 163841
1/*-
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 163841 2006-10-31 21:48:54Z pjd $");
34
35#include "opt_mac.h"
36#include "opt_quota.h"
37#include "opt_ufs.h"
38#include "opt_ffs.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/namei.h>
43#include <sys/proc.h>
44#include <sys/kernel.h>
45#include <sys/vnode.h>
46#include <sys/mount.h>
47#include <sys/bio.h>
48#include <sys/buf.h>
49#include <sys/conf.h>
50#include <sys/fcntl.h>
51#include <sys/malloc.h>
52#include <sys/mutex.h>
53
54#include <security/mac/mac_framework.h>
55
56#include <ufs/ufs/extattr.h>
57#include <ufs/ufs/gjournal.h>
58#include <ufs/ufs/quota.h>
59#include <ufs/ufs/ufsmount.h>
60#include <ufs/ufs/inode.h>
61#include <ufs/ufs/ufs_extern.h>
62
63#include <ufs/ffs/fs.h>
64#include <ufs/ffs/ffs_extern.h>
65
66#include <vm/vm.h>
67#include <vm/uma.h>
68#include <vm/vm_page.h>
69
70#include <geom/geom.h>
71#include <geom/geom_vfs.h>
72
73static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
74
75static int	ffs_reload(struct mount *, struct thread *);
76static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
77static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
78		    ufs2_daddr_t);
79static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
80static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
81static vfs_init_t ffs_init;
82static vfs_uninit_t ffs_uninit;
83static vfs_extattrctl_t ffs_extattrctl;
84static vfs_cmount_t ffs_cmount;
85static vfs_unmount_t ffs_unmount;
86static vfs_mount_t ffs_mount;
87static vfs_statfs_t ffs_statfs;
88static vfs_fhtovp_t ffs_fhtovp;
89static vfs_vptofh_t ffs_vptofh;
90static vfs_sync_t ffs_sync;
91
92static struct vfsops ufs_vfsops = {
93	.vfs_extattrctl =	ffs_extattrctl,
94	.vfs_fhtovp =		ffs_fhtovp,
95	.vfs_init =		ffs_init,
96	.vfs_mount =		ffs_mount,
97	.vfs_cmount =		ffs_cmount,
98	.vfs_quotactl =		ufs_quotactl,
99	.vfs_root =		ufs_root,
100	.vfs_statfs =		ffs_statfs,
101	.vfs_sync =		ffs_sync,
102	.vfs_uninit =		ffs_uninit,
103	.vfs_unmount =		ffs_unmount,
104	.vfs_vget =		ffs_vget,
105	.vfs_vptofh =		ffs_vptofh,
106};
107
108VFS_SET(ufs_vfsops, ufs, 0);
109MODULE_VERSION(ufs, 1);
110
111static b_strategy_t ffs_geom_strategy;
112static b_write_t ffs_bufwrite;
113
114static struct buf_ops ffs_ops = {
115	.bop_name =	"FFS",
116	.bop_write =	ffs_bufwrite,
117	.bop_strategy =	ffs_geom_strategy,
118	.bop_sync =	bufsync,
119};
120
121static const char *ffs_opts[] = { "acls", "async", "atime", "clusterr",
122    "clusterw", "exec", "export", "force", "from", "multilabel",
123    "snapshot", "suid", "suiddir", "symfollow", "sync",
124    "union", NULL };
125
126static int
127ffs_mount(struct mount *mp, struct thread *td)
128{
129	struct vnode *devvp;
130	struct ufsmount *ump = 0;
131	struct fs *fs;
132	int error, flags;
133	u_int mntorflags, mntandnotflags;
134	mode_t accessmode;
135	struct nameidata ndp;
136	char *fspec;
137
138	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
139		return (EINVAL);
140	if (uma_inode == NULL) {
141		uma_inode = uma_zcreate("FFS inode",
142		    sizeof(struct inode), NULL, NULL, NULL, NULL,
143		    UMA_ALIGN_PTR, 0);
144		uma_ufs1 = uma_zcreate("FFS1 dinode",
145		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
146		    UMA_ALIGN_PTR, 0);
147		uma_ufs2 = uma_zcreate("FFS2 dinode",
148		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
149		    UMA_ALIGN_PTR, 0);
150	}
151
152	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
153	if (error)
154		return (error);
155
156	mntorflags = 0;
157	mntandnotflags = 0;
158	if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
159		mntorflags |= MNT_ACLS;
160
161	if (vfs_getopt(mp->mnt_optnew, "async", NULL, NULL) == 0)
162		mntorflags |= MNT_ASYNC;
163
164	if (vfs_getopt(mp->mnt_optnew, "force", NULL, NULL) == 0)
165		mntorflags |= MNT_FORCE;
166
167	if (vfs_getopt(mp->mnt_optnew, "multilabel", NULL, NULL) == 0)
168		mntorflags |= MNT_MULTILABEL;
169
170	if (vfs_getopt(mp->mnt_optnew, "noasync", NULL, NULL) == 0)
171		mntandnotflags |= MNT_ASYNC;
172
173	if (vfs_getopt(mp->mnt_optnew, "noatime", NULL, NULL) == 0)
174		mntorflags |= MNT_NOATIME;
175
176	if (vfs_getopt(mp->mnt_optnew, "noclusterr", NULL, NULL) == 0)
177		mntorflags |= MNT_NOCLUSTERR;
178
179	if (vfs_getopt(mp->mnt_optnew, "noclusterw", NULL, NULL) == 0)
180		mntorflags |= MNT_NOCLUSTERW;
181
182	if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0)
183		mntorflags |= MNT_SNAPSHOT;
184
185	MNT_ILOCK(mp);
186	mp->mnt_flag = (mp->mnt_flag | mntorflags) & ~mntandnotflags;
187	MNT_IUNLOCK(mp);
188	/*
189	 * If updating, check whether changing from read-only to
190	 * read/write; if there is no device name, that's all we do.
191	 */
192	if (mp->mnt_flag & MNT_UPDATE) {
193		ump = VFSTOUFS(mp);
194		fs = ump->um_fs;
195		devvp = ump->um_devvp;
196		if (fs->fs_ronly == 0 &&
197		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
198			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
199				return (error);
200			/*
201			 * Flush any dirty data.
202			 */
203			if ((error = ffs_sync(mp, MNT_WAIT, td)) != 0) {
204				vn_finished_write(mp);
205				return (error);
206			}
207			/*
208			 * Check for and optionally get rid of files open
209			 * for writing.
210			 */
211			flags = WRITECLOSE;
212			if (mp->mnt_flag & MNT_FORCE)
213				flags |= FORCECLOSE;
214			if (mp->mnt_flag & MNT_SOFTDEP) {
215				error = softdep_flushfiles(mp, flags, td);
216			} else {
217				error = ffs_flushfiles(mp, flags, td);
218			}
219			if (error) {
220				vn_finished_write(mp);
221				return (error);
222			}
223			if (fs->fs_pendingblocks != 0 ||
224			    fs->fs_pendinginodes != 0) {
225				printf("%s: %s: blocks %jd files %d\n",
226				    fs->fs_fsmnt, "update error",
227				    (intmax_t)fs->fs_pendingblocks,
228				    fs->fs_pendinginodes);
229				fs->fs_pendingblocks = 0;
230				fs->fs_pendinginodes = 0;
231			}
232			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
233				fs->fs_clean = 1;
234			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
235				fs->fs_ronly = 0;
236				fs->fs_clean = 0;
237				vn_finished_write(mp);
238				return (error);
239			}
240			vn_finished_write(mp);
241			DROP_GIANT();
242			g_topology_lock();
243			g_access(ump->um_cp, 0, -1, 0);
244			g_topology_unlock();
245			PICKUP_GIANT();
246			fs->fs_ronly = 1;
247			MNT_ILOCK(mp);
248			mp->mnt_flag |= MNT_RDONLY;
249			MNT_IUNLOCK(mp);
250		}
251		if ((mp->mnt_flag & MNT_RELOAD) &&
252		    (error = ffs_reload(mp, td)) != 0)
253			return (error);
254		if (fs->fs_ronly &&
255		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
256			/*
257			 * If upgrade to read-write by non-root, then verify
258			 * that user has necessary permissions on the device.
259			 */
260			if (suser(td)) {
261				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
262				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
263				    td->td_ucred, td)) != 0) {
264					VOP_UNLOCK(devvp, 0, td);
265					return (error);
266				}
267				VOP_UNLOCK(devvp, 0, td);
268			}
269			fs->fs_flags &= ~FS_UNCLEAN;
270			if (fs->fs_clean == 0) {
271				fs->fs_flags |= FS_UNCLEAN;
272				if ((mp->mnt_flag & MNT_FORCE) ||
273				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
274				     (fs->fs_flags & FS_DOSOFTDEP))) {
275					printf("WARNING: %s was not %s\n",
276					   fs->fs_fsmnt, "properly dismounted");
277				} else {
278					printf(
279"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
280					    fs->fs_fsmnt);
281					return (EPERM);
282				}
283			}
284			DROP_GIANT();
285			g_topology_lock();
286			/*
287			 * If we're the root device, we may not have an E count
288			 * yet, get it now.
289			 */
290			if (ump->um_cp->ace == 0)
291				error = g_access(ump->um_cp, 0, 1, 1);
292			else
293				error = g_access(ump->um_cp, 0, 1, 0);
294			g_topology_unlock();
295			PICKUP_GIANT();
296			if (error)
297				return (error);
298			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
299				return (error);
300			fs->fs_ronly = 0;
301			MNT_ILOCK(mp);
302			mp->mnt_flag &= ~MNT_RDONLY;
303			MNT_IUNLOCK(mp);
304			fs->fs_clean = 0;
305			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
306				vn_finished_write(mp);
307				return (error);
308			}
309			/* check to see if we need to start softdep */
310			if ((fs->fs_flags & FS_DOSOFTDEP) &&
311			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
312				vn_finished_write(mp);
313				return (error);
314			}
315			if (fs->fs_snapinum[0] != 0)
316				ffs_snapshot_mount(mp);
317			vn_finished_write(mp);
318		}
319		/*
320		 * Soft updates is incompatible with "async",
321		 * so if we are doing softupdates stop the user
322		 * from setting the async flag in an update.
323		 * Softdep_mount() clears it in an initial mount
324		 * or ro->rw remount.
325		 */
326		if (mp->mnt_flag & MNT_SOFTDEP) {
327			/* XXX: Reset too late ? */
328			MNT_ILOCK(mp);
329			mp->mnt_flag &= ~MNT_ASYNC;
330			MNT_IUNLOCK(mp);
331		}
332		/*
333		 * Keep MNT_ACLS flag if it is stored in superblock.
334		 */
335		if ((fs->fs_flags & FS_ACLS) != 0) {
336			/* XXX: Set too late ? */
337			MNT_ILOCK(mp);
338			mp->mnt_flag |= MNT_ACLS;
339			MNT_IUNLOCK(mp);
340		}
341
342		/*
343		 * If this is a snapshot request, take the snapshot.
344		 */
345		if (mp->mnt_flag & MNT_SNAPSHOT)
346			return (ffs_snapshot(mp, fspec));
347	}
348
349	/*
350	 * Not an update, or updating the name: look up the name
351	 * and verify that it refers to a sensible disk device.
352	 */
353	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
354	if ((error = namei(&ndp)) != 0)
355		return (error);
356	NDFREE(&ndp, NDF_ONLY_PNBUF);
357	devvp = ndp.ni_vp;
358	if (!vn_isdisk(devvp, &error)) {
359		vput(devvp);
360		return (error);
361	}
362
363	/*
364	 * If mount by non-root, then verify that user has necessary
365	 * permissions on the device.
366	 */
367	if (suser(td)) {
368		accessmode = VREAD;
369		if ((mp->mnt_flag & MNT_RDONLY) == 0)
370			accessmode |= VWRITE;
371		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
372			vput(devvp);
373			return (error);
374		}
375	}
376
377	if (mp->mnt_flag & MNT_UPDATE) {
378		/*
379		 * Update only
380		 *
381		 * If it's not the same vnode, or at least the same device
382		 * then it's not correct.
383		 */
384
385		if (devvp->v_rdev != ump->um_devvp->v_rdev)
386			error = EINVAL;	/* needs translation */
387		vput(devvp);
388		if (error)
389			return (error);
390	} else {
391		/*
392		 * New mount
393		 *
394		 * We need the name for the mount point (also used for
395		 * "last mounted on") copied in. If an error occurs,
396		 * the mount point is discarded by the upper level code.
397		 * Note that vfs_mount() populates f_mntonname for us.
398		 */
399		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
400			vrele(devvp);
401			return (error);
402		}
403	}
404	vfs_mountedfrom(mp, fspec);
405	return (0);
406}
407
408/*
409 * Compatibility with old mount system call.
410 */
411
412static int
413ffs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
414{
415	struct ufs_args args;
416	int error;
417
418	if (data == NULL)
419		return (EINVAL);
420	error = copyin(data, &args, sizeof args);
421	if (error)
422		return (error);
423
424	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
425	ma = mount_arg(ma, "export", &args.export, sizeof args.export);
426	error = kernel_mount(ma, flags);
427
428	return (error);
429}
430
431/*
432 * Reload all incore data for a filesystem (used after running fsck on
433 * the root filesystem and finding things to fix). The filesystem must
434 * be mounted read-only.
435 *
436 * Things to do to update the mount:
437 *	1) invalidate all cached meta-data.
438 *	2) re-read superblock from disk.
439 *	3) re-read summary information from disk.
440 *	4) invalidate all inactive vnodes.
441 *	5) invalidate all cached file data.
442 *	6) re-read inode data for all active vnodes.
443 */
444static int
445ffs_reload(struct mount *mp, struct thread *td)
446{
447	struct vnode *vp, *mvp, *devvp;
448	struct inode *ip;
449	void *space;
450	struct buf *bp;
451	struct fs *fs, *newfs;
452	struct ufsmount *ump;
453	ufs2_daddr_t sblockloc;
454	int i, blks, size, error;
455	int32_t *lp;
456
457	if ((mp->mnt_flag & MNT_RDONLY) == 0)
458		return (EINVAL);
459	ump = VFSTOUFS(mp);
460	/*
461	 * Step 1: invalidate all cached meta-data.
462	 */
463	devvp = VFSTOUFS(mp)->um_devvp;
464	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
465	if (vinvalbuf(devvp, 0, td, 0, 0) != 0)
466		panic("ffs_reload: dirty1");
467	VOP_UNLOCK(devvp, 0, td);
468
469	/*
470	 * Step 2: re-read superblock from disk.
471	 */
472	fs = VFSTOUFS(mp)->um_fs;
473	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
474	    NOCRED, &bp)) != 0)
475		return (error);
476	newfs = (struct fs *)bp->b_data;
477	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
478	     newfs->fs_magic != FS_UFS2_MAGIC) ||
479	    newfs->fs_bsize > MAXBSIZE ||
480	    newfs->fs_bsize < sizeof(struct fs)) {
481			brelse(bp);
482			return (EIO);		/* XXX needs translation */
483	}
484	/*
485	 * Copy pointer fields back into superblock before copying in	XXX
486	 * new superblock. These should really be in the ufsmount.	XXX
487	 * Note that important parameters (eg fs_ncg) are unchanged.
488	 */
489	newfs->fs_csp = fs->fs_csp;
490	newfs->fs_maxcluster = fs->fs_maxcluster;
491	newfs->fs_contigdirs = fs->fs_contigdirs;
492	newfs->fs_active = fs->fs_active;
493	/* The file system is still read-only. */
494	newfs->fs_ronly = 1;
495	sblockloc = fs->fs_sblockloc;
496	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
497	brelse(bp);
498	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
499	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
500	UFS_LOCK(ump);
501	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
502		printf("%s: reload pending error: blocks %jd files %d\n",
503		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
504		    fs->fs_pendinginodes);
505		fs->fs_pendingblocks = 0;
506		fs->fs_pendinginodes = 0;
507	}
508	UFS_UNLOCK(ump);
509
510	/*
511	 * Step 3: re-read summary information from disk.
512	 */
513	blks = howmany(fs->fs_cssize, fs->fs_fsize);
514	space = fs->fs_csp;
515	for (i = 0; i < blks; i += fs->fs_frag) {
516		size = fs->fs_bsize;
517		if (i + fs->fs_frag > blks)
518			size = (blks - i) * fs->fs_fsize;
519		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
520		    NOCRED, &bp);
521		if (error)
522			return (error);
523		bcopy(bp->b_data, space, (u_int)size);
524		space = (char *)space + size;
525		brelse(bp);
526	}
527	/*
528	 * We no longer know anything about clusters per cylinder group.
529	 */
530	if (fs->fs_contigsumsize > 0) {
531		lp = fs->fs_maxcluster;
532		for (i = 0; i < fs->fs_ncg; i++)
533			*lp++ = fs->fs_contigsumsize;
534	}
535
536loop:
537	MNT_ILOCK(mp);
538	MNT_VNODE_FOREACH(vp, mp, mvp) {
539		VI_LOCK(vp);
540		if (vp->v_iflag & VI_DOOMED) {
541			VI_UNLOCK(vp);
542			continue;
543		}
544		MNT_IUNLOCK(mp);
545		/*
546		 * Step 4: invalidate all cached file data.
547		 */
548		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
549			MNT_VNODE_FOREACH_ABORT(mp, mvp);
550			goto loop;
551		}
552		if (vinvalbuf(vp, 0, td, 0, 0))
553			panic("ffs_reload: dirty2");
554		/*
555		 * Step 5: re-read inode data for all active vnodes.
556		 */
557		ip = VTOI(vp);
558		error =
559		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
560		    (int)fs->fs_bsize, NOCRED, &bp);
561		if (error) {
562			VOP_UNLOCK(vp, 0, td);
563			vrele(vp);
564			MNT_VNODE_FOREACH_ABORT(mp, mvp);
565			return (error);
566		}
567		ffs_load_inode(bp, ip, fs, ip->i_number);
568		ip->i_effnlink = ip->i_nlink;
569		brelse(bp);
570		VOP_UNLOCK(vp, 0, td);
571		vrele(vp);
572		MNT_ILOCK(mp);
573	}
574	MNT_IUNLOCK(mp);
575	return (0);
576}
577
578/*
579 * Possible superblock locations ordered from most to least likely.
580 */
581static int sblock_try[] = SBLOCKSEARCH;
582
583/*
584 * Common code for mount and mountroot
585 */
586static int
587ffs_mountfs(devvp, mp, td)
588	struct vnode *devvp;
589	struct mount *mp;
590	struct thread *td;
591{
592	struct ufsmount *ump;
593	struct buf *bp;
594	struct fs *fs;
595	struct cdev *dev;
596	void *space;
597	ufs2_daddr_t sblockloc;
598	int error, i, blks, size, ronly;
599	int32_t *lp;
600	struct ucred *cred;
601	struct g_consumer *cp;
602	struct mount *nmp;
603
604	dev = devvp->v_rdev;
605	cred = td ? td->td_ucred : NOCRED;
606
607	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
608	DROP_GIANT();
609	g_topology_lock();
610	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
611
612	/*
613	 * If we are a root mount, drop the E flag so fsck can do its magic.
614	 * We will pick it up again when we remount R/W.
615	 */
616	if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
617		error = g_access(cp, 0, 0, -1);
618	g_topology_unlock();
619	PICKUP_GIANT();
620	VOP_UNLOCK(devvp, 0, td);
621	if (error)
622		return (error);
623	if (devvp->v_rdev->si_iosize_max != 0)
624		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
625	if (mp->mnt_iosize_max > MAXPHYS)
626		mp->mnt_iosize_max = MAXPHYS;
627
628	devvp->v_bufobj.bo_private = cp;
629	devvp->v_bufobj.bo_ops = &ffs_ops;
630
631	bp = NULL;
632	ump = NULL;
633	fs = NULL;
634	sblockloc = 0;
635	/*
636	 * Try reading the superblock in each of its possible locations.
637	 */
638	for (i = 0; sblock_try[i] != -1; i++) {
639		if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
640			error = EINVAL;
641			vfs_mount_error(mp,
642			    "Invalid sectorsize %d for superblock size %d",
643			    cp->provider->sectorsize, SBLOCKSIZE);
644			goto out;
645		}
646		if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
647		    cred, &bp)) != 0)
648			goto out;
649		fs = (struct fs *)bp->b_data;
650		sblockloc = sblock_try[i];
651		if ((fs->fs_magic == FS_UFS1_MAGIC ||
652		     (fs->fs_magic == FS_UFS2_MAGIC &&
653		      (fs->fs_sblockloc == sblockloc ||
654		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
655		    fs->fs_bsize <= MAXBSIZE &&
656		    fs->fs_bsize >= sizeof(struct fs))
657			break;
658		brelse(bp);
659		bp = NULL;
660	}
661	if (sblock_try[i] == -1) {
662		error = EINVAL;		/* XXX needs translation */
663		goto out;
664	}
665	fs->fs_fmod = 0;
666	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
667	fs->fs_flags &= ~FS_UNCLEAN;
668	if (fs->fs_clean == 0) {
669		fs->fs_flags |= FS_UNCLEAN;
670		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
671		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
672		     (fs->fs_flags & FS_DOSOFTDEP))) {
673			printf(
674"WARNING: %s was not properly dismounted\n",
675			    fs->fs_fsmnt);
676		} else {
677			printf(
678"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
679			    fs->fs_fsmnt);
680			error = EPERM;
681			goto out;
682		}
683		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
684		    (mp->mnt_flag & MNT_FORCE)) {
685			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
686			    (intmax_t)fs->fs_pendingblocks,
687			    fs->fs_pendinginodes);
688			fs->fs_pendingblocks = 0;
689			fs->fs_pendinginodes = 0;
690		}
691	}
692	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
693		printf("%s: mount pending error: blocks %jd files %d\n",
694		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
695		    fs->fs_pendinginodes);
696		fs->fs_pendingblocks = 0;
697		fs->fs_pendinginodes = 0;
698	}
699	if ((fs->fs_flags & FS_GJOURNAL) != 0) {
700#ifdef UFS_GJOURNAL
701		/*
702		 * Get journal provider name.
703		 */
704		size = 1024;
705		mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK);
706		if (g_io_getattr("GJOURNAL::provider", cp, &size,
707		    mp->mnt_gjprovider) == 0) {
708			mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size,
709			    M_UFSMNT, M_WAITOK);
710			MNT_ILOCK(mp);
711			mp->mnt_flag |= MNT_GJOURNAL;
712			MNT_IUNLOCK(mp);
713		} else {
714			printf(
715"WARNING: %s: GJOURNAL flag on fs but no gjournal provider below\n",
716			    mp->mnt_stat.f_mntonname);
717			free(mp->mnt_gjprovider, M_UFSMNT);
718			mp->mnt_gjprovider = NULL;
719		}
720#else
721		printf(
722"WARNING: %s: GJOURNAL flag on fs but no UFS_GJOURNAL support\n",
723		    mp->mnt_stat.f_mntonname);
724#endif
725	} else {
726		mp->mnt_gjprovider = NULL;
727	}
728	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
729	ump->um_cp = cp;
730	ump->um_bo = &devvp->v_bufobj;
731	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
732	if (fs->fs_magic == FS_UFS1_MAGIC) {
733		ump->um_fstype = UFS1;
734		ump->um_balloc = ffs_balloc_ufs1;
735	} else {
736		ump->um_fstype = UFS2;
737		ump->um_balloc = ffs_balloc_ufs2;
738	}
739	ump->um_blkatoff = ffs_blkatoff;
740	ump->um_truncate = ffs_truncate;
741	ump->um_update = ffs_update;
742	ump->um_valloc = ffs_valloc;
743	ump->um_vfree = ffs_vfree;
744	ump->um_ifree = ffs_ifree;
745	mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
746	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
747	if (fs->fs_sbsize < SBLOCKSIZE)
748		bp->b_flags |= B_INVAL | B_NOCACHE;
749	brelse(bp);
750	bp = NULL;
751	fs = ump->um_fs;
752	ffs_oldfscompat_read(fs, ump, sblockloc);
753	fs->fs_ronly = ronly;
754	size = fs->fs_cssize;
755	blks = howmany(size, fs->fs_fsize);
756	if (fs->fs_contigsumsize > 0)
757		size += fs->fs_ncg * sizeof(int32_t);
758	size += fs->fs_ncg * sizeof(u_int8_t);
759	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
760	fs->fs_csp = space;
761	for (i = 0; i < blks; i += fs->fs_frag) {
762		size = fs->fs_bsize;
763		if (i + fs->fs_frag > blks)
764			size = (blks - i) * fs->fs_fsize;
765		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
766		    cred, &bp)) != 0) {
767			free(fs->fs_csp, M_UFSMNT);
768			goto out;
769		}
770		bcopy(bp->b_data, space, (u_int)size);
771		space = (char *)space + size;
772		brelse(bp);
773		bp = NULL;
774	}
775	if (fs->fs_contigsumsize > 0) {
776		fs->fs_maxcluster = lp = space;
777		for (i = 0; i < fs->fs_ncg; i++)
778			*lp++ = fs->fs_contigsumsize;
779		space = lp;
780	}
781	size = fs->fs_ncg * sizeof(u_int8_t);
782	fs->fs_contigdirs = (u_int8_t *)space;
783	bzero(fs->fs_contigdirs, size);
784	fs->fs_active = NULL;
785	mp->mnt_data = (qaddr_t)ump;
786	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
787	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
788	nmp = NULL;
789	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
790	    (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
791		if (nmp)
792			vfs_rel(nmp);
793		vfs_getnewfsid(mp);
794	}
795	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
796	MNT_ILOCK(mp);
797	mp->mnt_flag |= MNT_LOCAL;
798	MNT_IUNLOCK(mp);
799	if ((fs->fs_flags & FS_MULTILABEL) != 0) {
800#ifdef MAC
801		MNT_ILOCK(mp);
802		mp->mnt_flag |= MNT_MULTILABEL;
803		MNT_IUNLOCK(mp);
804#else
805		printf(
806"WARNING: %s: multilabel flag on fs but no MAC support\n",
807		    mp->mnt_stat.f_mntonname);
808#endif
809	}
810	if ((fs->fs_flags & FS_ACLS) != 0) {
811#ifdef UFS_ACL
812		MNT_ILOCK(mp);
813		mp->mnt_flag |= MNT_ACLS;
814		MNT_IUNLOCK(mp);
815#else
816		printf(
817"WARNING: %s: ACLs flag on fs but no ACLs support\n",
818		    mp->mnt_stat.f_mntonname);
819#endif
820	}
821	ump->um_mountp = mp;
822	ump->um_dev = dev;
823	ump->um_devvp = devvp;
824	ump->um_nindir = fs->fs_nindir;
825	ump->um_bptrtodb = fs->fs_fsbtodb;
826	ump->um_seqinc = fs->fs_frag;
827	for (i = 0; i < MAXQUOTAS; i++)
828		ump->um_quotas[i] = NULLVP;
829#ifdef UFS_EXTATTR
830	ufs_extattr_uepm_init(&ump->um_extattr);
831#endif
832	/*
833	 * Set FS local "last mounted on" information (NULL pad)
834	 */
835	bzero(fs->fs_fsmnt, MAXMNTLEN);
836	strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
837
838	if( mp->mnt_flag & MNT_ROOTFS) {
839		/*
840		 * Root mount; update timestamp in mount structure.
841		 * this will be used by the common root mount code
842		 * to update the system clock.
843		 */
844		mp->mnt_time = fs->fs_time;
845	}
846
847	if (ronly == 0) {
848		if ((fs->fs_flags & FS_DOSOFTDEP) &&
849		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
850			free(fs->fs_csp, M_UFSMNT);
851			goto out;
852		}
853		if (fs->fs_snapinum[0] != 0)
854			ffs_snapshot_mount(mp);
855		fs->fs_fmod = 1;
856		fs->fs_clean = 0;
857		(void) ffs_sbupdate(ump, MNT_WAIT, 0);
858	}
859	/*
860	 * Initialize filesystem stat information in mount struct.
861	 */
862#ifdef UFS_EXTATTR
863#ifdef UFS_EXTATTR_AUTOSTART
864	/*
865	 *
866	 * Auto-starting does the following:
867	 *	- check for /.attribute in the fs, and extattr_start if so
868	 *	- for each file in .attribute, enable that file with
869	 * 	  an attribute of the same name.
870	 * Not clear how to report errors -- probably eat them.
871	 * This would all happen while the filesystem was busy/not
872	 * available, so would effectively be "atomic".
873	 */
874	(void) ufs_extattr_autostart(mp, td);
875#endif /* !UFS_EXTATTR_AUTOSTART */
876#endif /* !UFS_EXTATTR */
877#ifdef QUOTA
878	/*
879	 * Our bufobj must require giant for snapshots when quotas are
880	 * enabled.
881	 */
882	BO_LOCK(&devvp->v_bufobj);
883	devvp->v_bufobj.bo_flag |= BO_NEEDSGIANT;
884	BO_UNLOCK(&devvp->v_bufobj);
885#else
886	MNT_ILOCK(mp);
887	mp->mnt_kern_flag |= MNTK_MPSAFE;
888	MNT_IUNLOCK(mp);
889#endif
890	return (0);
891out:
892	if (bp)
893		brelse(bp);
894	if (cp != NULL) {
895		DROP_GIANT();
896		g_topology_lock();
897		g_vfs_close(cp, td);
898		g_topology_unlock();
899		PICKUP_GIANT();
900	}
901	if (ump) {
902		mtx_destroy(UFS_MTX(ump));
903		if (mp->mnt_gjprovider != NULL) {
904			free(mp->mnt_gjprovider, M_UFSMNT);
905			mp->mnt_gjprovider = NULL;
906		}
907		free(ump->um_fs, M_UFSMNT);
908		free(ump, M_UFSMNT);
909		mp->mnt_data = (qaddr_t)0;
910	}
911	return (error);
912}
913
914#include <sys/sysctl.h>
915static int bigcgs = 0;
916SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
917
918/*
919 * Sanity checks for loading old filesystem superblocks.
920 * See ffs_oldfscompat_write below for unwound actions.
921 *
922 * XXX - Parts get retired eventually.
923 * Unfortunately new bits get added.
924 */
925static void
926ffs_oldfscompat_read(fs, ump, sblockloc)
927	struct fs *fs;
928	struct ufsmount *ump;
929	ufs2_daddr_t sblockloc;
930{
931	off_t maxfilesize;
932
933	/*
934	 * If not yet done, update fs_flags location and value of fs_sblockloc.
935	 */
936	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
937		fs->fs_flags = fs->fs_old_flags;
938		fs->fs_old_flags |= FS_FLAGS_UPDATED;
939		fs->fs_sblockloc = sblockloc;
940	}
941	/*
942	 * If not yet done, update UFS1 superblock with new wider fields.
943	 */
944	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
945		fs->fs_maxbsize = fs->fs_bsize;
946		fs->fs_time = fs->fs_old_time;
947		fs->fs_size = fs->fs_old_size;
948		fs->fs_dsize = fs->fs_old_dsize;
949		fs->fs_csaddr = fs->fs_old_csaddr;
950		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
951		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
952		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
953		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
954	}
955	if (fs->fs_magic == FS_UFS1_MAGIC &&
956	    fs->fs_old_inodefmt < FS_44INODEFMT) {
957		fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
958		fs->fs_qbmask = ~fs->fs_bmask;
959		fs->fs_qfmask = ~fs->fs_fmask;
960	}
961	if (fs->fs_magic == FS_UFS1_MAGIC) {
962		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
963		maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
964		if (fs->fs_maxfilesize > maxfilesize)
965			fs->fs_maxfilesize = maxfilesize;
966	}
967	/* Compatibility for old filesystems */
968	if (fs->fs_avgfilesize <= 0)
969		fs->fs_avgfilesize = AVFILESIZ;
970	if (fs->fs_avgfpdir <= 0)
971		fs->fs_avgfpdir = AFPDIR;
972	if (bigcgs) {
973		fs->fs_save_cgsize = fs->fs_cgsize;
974		fs->fs_cgsize = fs->fs_bsize;
975	}
976}
977
978/*
979 * Unwinding superblock updates for old filesystems.
980 * See ffs_oldfscompat_read above for details.
981 *
982 * XXX - Parts get retired eventually.
983 * Unfortunately new bits get added.
984 */
985static void
986ffs_oldfscompat_write(fs, ump)
987	struct fs *fs;
988	struct ufsmount *ump;
989{
990
991	/*
992	 * Copy back UFS2 updated fields that UFS1 inspects.
993	 */
994	if (fs->fs_magic == FS_UFS1_MAGIC) {
995		fs->fs_old_time = fs->fs_time;
996		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
997		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
998		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
999		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1000		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
1001	}
1002	if (bigcgs) {
1003		fs->fs_cgsize = fs->fs_save_cgsize;
1004		fs->fs_save_cgsize = 0;
1005	}
1006}
1007
1008/*
1009 * unmount system call
1010 */
1011static int
1012ffs_unmount(mp, mntflags, td)
1013	struct mount *mp;
1014	int mntflags;
1015	struct thread *td;
1016{
1017	struct ufsmount *ump = VFSTOUFS(mp);
1018	struct fs *fs;
1019	int error, flags;
1020
1021	flags = 0;
1022	if (mntflags & MNT_FORCE) {
1023		flags |= FORCECLOSE;
1024	}
1025#ifdef UFS_EXTATTR
1026	if ((error = ufs_extattr_stop(mp, td))) {
1027		if (error != EOPNOTSUPP)
1028			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
1029			    error);
1030	} else {
1031		ufs_extattr_uepm_destroy(&ump->um_extattr);
1032	}
1033#endif
1034	if (mp->mnt_flag & MNT_SOFTDEP) {
1035		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
1036			return (error);
1037	} else {
1038		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
1039			return (error);
1040	}
1041	fs = ump->um_fs;
1042	UFS_LOCK(ump);
1043	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1044		printf("%s: unmount pending error: blocks %jd files %d\n",
1045		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1046		    fs->fs_pendinginodes);
1047		fs->fs_pendingblocks = 0;
1048		fs->fs_pendinginodes = 0;
1049	}
1050	UFS_UNLOCK(ump);
1051	if (fs->fs_ronly == 0) {
1052		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1053		error = ffs_sbupdate(ump, MNT_WAIT, 0);
1054		if (error) {
1055			fs->fs_clean = 0;
1056			return (error);
1057		}
1058	}
1059	DROP_GIANT();
1060	g_topology_lock();
1061	g_vfs_close(ump->um_cp, td);
1062	g_topology_unlock();
1063	PICKUP_GIANT();
1064	vrele(ump->um_devvp);
1065	mtx_destroy(UFS_MTX(ump));
1066	if (mp->mnt_gjprovider != NULL) {
1067		free(mp->mnt_gjprovider, M_UFSMNT);
1068		mp->mnt_gjprovider = NULL;
1069	}
1070	free(fs->fs_csp, M_UFSMNT);
1071	free(fs, M_UFSMNT);
1072	free(ump, M_UFSMNT);
1073	mp->mnt_data = (qaddr_t)0;
1074	MNT_ILOCK(mp);
1075	mp->mnt_flag &= ~MNT_LOCAL;
1076	MNT_IUNLOCK(mp);
1077	return (error);
1078}
1079
1080/*
1081 * Flush out all the files in a filesystem.
1082 */
1083int
1084ffs_flushfiles(mp, flags, td)
1085	struct mount *mp;
1086	int flags;
1087	struct thread *td;
1088{
1089	struct ufsmount *ump;
1090	int error;
1091
1092	ump = VFSTOUFS(mp);
1093#ifdef QUOTA
1094	if (mp->mnt_flag & MNT_QUOTA) {
1095		int i;
1096		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1097		if (error)
1098			return (error);
1099		for (i = 0; i < MAXQUOTAS; i++) {
1100			if (ump->um_quotas[i] == NULLVP)
1101				continue;
1102			quotaoff(td, mp, i);
1103		}
1104		/*
1105		 * Here we fall through to vflush again to ensure
1106		 * that we have gotten rid of all the system vnodes.
1107		 */
1108	}
1109#endif
1110	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1111	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1112		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1113			return (error);
1114		ffs_snapshot_unmount(mp);
1115		flags |= FORCECLOSE;
1116		/*
1117		 * Here we fall through to vflush again to ensure
1118		 * that we have gotten rid of all the system vnodes.
1119		 */
1120	}
1121        /*
1122	 * Flush all the files.
1123	 */
1124	if ((error = vflush(mp, 0, flags, td)) != 0)
1125		return (error);
1126	/*
1127	 * Flush filesystem metadata.
1128	 */
1129	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1130	error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1131	VOP_UNLOCK(ump->um_devvp, 0, td);
1132	return (error);
1133}
1134
1135/*
1136 * Get filesystem statistics.
1137 */
1138static int
1139ffs_statfs(mp, sbp, td)
1140	struct mount *mp;
1141	struct statfs *sbp;
1142	struct thread *td;
1143{
1144	struct ufsmount *ump;
1145	struct fs *fs;
1146
1147	ump = VFSTOUFS(mp);
1148	fs = ump->um_fs;
1149	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1150		panic("ffs_statfs");
1151	sbp->f_version = STATFS_VERSION;
1152	sbp->f_bsize = fs->fs_fsize;
1153	sbp->f_iosize = fs->fs_bsize;
1154	sbp->f_blocks = fs->fs_dsize;
1155	UFS_LOCK(ump);
1156	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1157	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1158	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1159	    dbtofsb(fs, fs->fs_pendingblocks);
1160	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1161	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1162	UFS_UNLOCK(ump);
1163	sbp->f_namemax = NAME_MAX;
1164	return (0);
1165}
1166
1167/*
1168 * Go through the disk queues to initiate sandbagged IO;
1169 * go through the inodes to write those that have been modified;
1170 * initiate the writing of the super block if it has been modified.
1171 *
1172 * Note: we are always called with the filesystem marked `MPBUSY'.
1173 */
1174static int
1175ffs_sync(mp, waitfor, td)
1176	struct mount *mp;
1177	int waitfor;
1178	struct thread *td;
1179{
1180	struct vnode *mvp, *vp, *devvp;
1181	struct inode *ip;
1182	struct ufsmount *ump = VFSTOUFS(mp);
1183	struct fs *fs;
1184	int error, count, wait, lockreq, allerror = 0;
1185	int suspend;
1186	int suspended;
1187	int secondary_writes;
1188	int secondary_accwrites;
1189	int softdep_deps;
1190	int softdep_accdeps;
1191	struct bufobj *bo;
1192
1193	fs = ump->um_fs;
1194	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1195		printf("fs = %s\n", fs->fs_fsmnt);
1196		panic("ffs_sync: rofs mod");
1197	}
1198	/*
1199	 * Write back each (modified) inode.
1200	 */
1201	wait = 0;
1202	suspend = 0;
1203	suspended = 0;
1204	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1205	if (waitfor == MNT_SUSPEND) {
1206		suspend = 1;
1207		waitfor = MNT_WAIT;
1208	}
1209	if (waitfor == MNT_WAIT) {
1210		wait = 1;
1211		lockreq = LK_EXCLUSIVE;
1212	}
1213	lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
1214	MNT_ILOCK(mp);
1215loop:
1216	/* Grab snapshot of secondary write counts */
1217	secondary_writes = mp->mnt_secondary_writes;
1218	secondary_accwrites = mp->mnt_secondary_accwrites;
1219
1220	/* Grab snapshot of softdep dependency counts */
1221	MNT_IUNLOCK(mp);
1222	softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1223	MNT_ILOCK(mp);
1224
1225	MNT_VNODE_FOREACH(vp, mp, mvp) {
1226		/*
1227		 * Depend on the mntvnode_slock to keep things stable enough
1228		 * for a quick test.  Since there might be hundreds of
1229		 * thousands of vnodes, we cannot afford even a subroutine
1230		 * call unless there's a good chance that we have work to do.
1231		 */
1232		VI_LOCK(vp);
1233		if (vp->v_iflag & VI_DOOMED) {
1234			VI_UNLOCK(vp);
1235			continue;
1236		}
1237		ip = VTOI(vp);
1238		if (vp->v_type == VNON || ((ip->i_flag &
1239		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1240		    vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1241			VI_UNLOCK(vp);
1242			continue;
1243		}
1244		MNT_IUNLOCK(mp);
1245		if ((error = vget(vp, lockreq, td)) != 0) {
1246			MNT_ILOCK(mp);
1247			if (error == ENOENT || error == ENOLCK) {
1248				MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1249				goto loop;
1250			}
1251			continue;
1252		}
1253		if ((error = ffs_syncvnode(vp, waitfor)) != 0)
1254			allerror = error;
1255		vput(vp);
1256		MNT_ILOCK(mp);
1257	}
1258	MNT_IUNLOCK(mp);
1259	/*
1260	 * Force stale filesystem control information to be flushed.
1261	 */
1262	if (waitfor == MNT_WAIT) {
1263		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1264			allerror = error;
1265		/* Flushed work items may create new vnodes to clean */
1266		if (allerror == 0 && count) {
1267			MNT_ILOCK(mp);
1268			goto loop;
1269		}
1270	}
1271#ifdef QUOTA
1272	qsync(mp);
1273#endif
1274	devvp = ump->um_devvp;
1275	VI_LOCK(devvp);
1276	bo = &devvp->v_bufobj;
1277	if (waitfor != MNT_LAZY &&
1278	    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1279		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1280		if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
1281			allerror = error;
1282		VOP_UNLOCK(devvp, 0, td);
1283		if (allerror == 0 && waitfor == MNT_WAIT) {
1284			MNT_ILOCK(mp);
1285			goto loop;
1286		}
1287	} else if (suspend != 0) {
1288		if (softdep_check_suspend(mp,
1289					  devvp,
1290					  softdep_deps,
1291					  softdep_accdeps,
1292					  secondary_writes,
1293					  secondary_accwrites) != 0)
1294			goto loop;	/* More work needed */
1295		mtx_assert(MNT_MTX(mp), MA_OWNED);
1296		mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1297		MNT_IUNLOCK(mp);
1298		suspended = 1;
1299	} else
1300		VI_UNLOCK(devvp);
1301	/*
1302	 * Write back modified superblock.
1303	 */
1304	if (fs->fs_fmod != 0 &&
1305	    (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1306		allerror = error;
1307	return (allerror);
1308}
1309
1310int
1311ffs_vget(mp, ino, flags, vpp)
1312	struct mount *mp;
1313	ino_t ino;
1314	int flags;
1315	struct vnode **vpp;
1316{
1317	struct fs *fs;
1318	struct inode *ip;
1319	struct ufsmount *ump;
1320	struct buf *bp;
1321	struct vnode *vp;
1322	struct cdev *dev;
1323	int error;
1324
1325	error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1326	if (error || *vpp != NULL)
1327		return (error);
1328
1329	/*
1330	 * We must promote to an exclusive lock for vnode creation.  This
1331	 * can happen if lookup is passed LOCKSHARED.
1332 	 */
1333	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1334		flags &= ~LK_TYPE_MASK;
1335		flags |= LK_EXCLUSIVE;
1336	}
1337
1338	/*
1339	 * We do not lock vnode creation as it is believed to be too
1340	 * expensive for such rare case as simultaneous creation of vnode
1341	 * for same ino by different processes. We just allow them to race
1342	 * and check later to decide who wins. Let the race begin!
1343	 */
1344
1345	ump = VFSTOUFS(mp);
1346	dev = ump->um_dev;
1347	fs = ump->um_fs;
1348
1349	/*
1350	 * If this MALLOC() is performed after the getnewvnode()
1351	 * it might block, leaving a vnode with a NULL v_data to be
1352	 * found by ffs_sync() if a sync happens to fire right then,
1353	 * which will cause a panic because ffs_sync() blindly
1354	 * dereferences vp->v_data (as well it should).
1355	 */
1356	ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
1357
1358	/* Allocate a new vnode/inode. */
1359	if (fs->fs_magic == FS_UFS1_MAGIC)
1360		error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
1361	else
1362		error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
1363	if (error) {
1364		*vpp = NULL;
1365		uma_zfree(uma_inode, ip);
1366		return (error);
1367	}
1368	/*
1369	 * FFS supports recursive and shared locking.
1370	 */
1371	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1372	vp->v_vnlock->lk_flags &= ~LK_NOSHARE;
1373	vp->v_data = ip;
1374	vp->v_bufobj.bo_bsize = fs->fs_bsize;
1375	ip->i_vnode = vp;
1376	ip->i_ump = ump;
1377	ip->i_fs = fs;
1378	ip->i_dev = dev;
1379	ip->i_number = ino;
1380#ifdef QUOTA
1381	{
1382		int i;
1383		for (i = 0; i < MAXQUOTAS; i++)
1384			ip->i_dquot[i] = NODQUOT;
1385	}
1386#endif
1387
1388	error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
1389	if (error || *vpp != NULL)
1390		return (error);
1391
1392	/* Read in the disk contents for the inode, copy into the inode. */
1393	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1394	    (int)fs->fs_bsize, NOCRED, &bp);
1395	if (error) {
1396		/*
1397		 * The inode does not contain anything useful, so it would
1398		 * be misleading to leave it on its hash chain. With mode
1399		 * still zero, it will be unlinked and returned to the free
1400		 * list by vput().
1401		 */
1402		brelse(bp);
1403		vput(vp);
1404		*vpp = NULL;
1405		return (error);
1406	}
1407	if (ip->i_ump->um_fstype == UFS1)
1408		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1409	else
1410		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1411	ffs_load_inode(bp, ip, fs, ino);
1412	if (DOINGSOFTDEP(vp))
1413		softdep_load_inodeblock(ip);
1414	else
1415		ip->i_effnlink = ip->i_nlink;
1416	bqrelse(bp);
1417
1418	/*
1419	 * Initialize the vnode from the inode, check for aliases.
1420	 * Note that the underlying vnode may have changed.
1421	 */
1422	if (ip->i_ump->um_fstype == UFS1)
1423		error = ufs_vinit(mp, &ffs_fifoops1, &vp);
1424	else
1425		error = ufs_vinit(mp, &ffs_fifoops2, &vp);
1426	if (error) {
1427		vput(vp);
1428		*vpp = NULL;
1429		return (error);
1430	}
1431
1432	/*
1433	 * Finish inode initialization.
1434	 */
1435
1436	/*
1437	 * Set up a generation number for this inode if it does not
1438	 * already have one. This should only happen on old filesystems.
1439	 */
1440	if (ip->i_gen == 0) {
1441		ip->i_gen = arc4random() / 2 + 1;
1442		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1443			ip->i_flag |= IN_MODIFIED;
1444			DIP_SET(ip, i_gen, ip->i_gen);
1445		}
1446	}
1447	/*
1448	 * Ensure that uid and gid are correct. This is a temporary
1449	 * fix until fsck has been changed to do the update.
1450	 */
1451	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1452	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1453		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1454		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1455	}						/* XXX */
1456
1457#ifdef MAC
1458	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1459		/*
1460		 * If this vnode is already allocated, and we're running
1461		 * multi-label, attempt to perform a label association
1462		 * from the extended attributes on the inode.
1463		 */
1464		error = mac_associate_vnode_extattr(mp, vp);
1465		if (error) {
1466			/* ufs_inactive will release ip->i_devvp ref. */
1467			vput(vp);
1468			*vpp = NULL;
1469			return (error);
1470		}
1471	}
1472#endif
1473
1474	*vpp = vp;
1475	return (0);
1476}
1477
1478/*
1479 * File handle to vnode
1480 *
1481 * Have to be really careful about stale file handles:
1482 * - check that the inode number is valid
1483 * - call ffs_vget() to get the locked inode
1484 * - check for an unallocated inode (i_mode == 0)
1485 * - check that the given client host has export rights and return
1486 *   those rights via. exflagsp and credanonp
1487 */
1488static int
1489ffs_fhtovp(mp, fhp, vpp)
1490	struct mount *mp;
1491	struct fid *fhp;
1492	struct vnode **vpp;
1493{
1494	struct ufid *ufhp;
1495	struct fs *fs;
1496
1497	ufhp = (struct ufid *)fhp;
1498	fs = VFSTOUFS(mp)->um_fs;
1499	if (ufhp->ufid_ino < ROOTINO ||
1500	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1501		return (ESTALE);
1502	return (ufs_fhtovp(mp, ufhp, vpp));
1503}
1504
1505/*
1506 * Vnode pointer to File handle
1507 */
1508/* ARGSUSED */
1509static int
1510ffs_vptofh(vp, fhp)
1511	struct vnode *vp;
1512	struct fid *fhp;
1513{
1514	struct inode *ip;
1515	struct ufid *ufhp;
1516
1517	ip = VTOI(vp);
1518	ufhp = (struct ufid *)fhp;
1519	ufhp->ufid_len = sizeof(struct ufid);
1520	ufhp->ufid_ino = ip->i_number;
1521	ufhp->ufid_gen = ip->i_gen;
1522	return (0);
1523}
1524
1525/*
1526 * Initialize the filesystem.
1527 */
1528static int
1529ffs_init(vfsp)
1530	struct vfsconf *vfsp;
1531{
1532
1533	softdep_initialize();
1534	return (ufs_init(vfsp));
1535}
1536
1537/*
1538 * Undo the work of ffs_init().
1539 */
1540static int
1541ffs_uninit(vfsp)
1542	struct vfsconf *vfsp;
1543{
1544	int ret;
1545
1546	ret = ufs_uninit(vfsp);
1547	softdep_uninitialize();
1548	return (ret);
1549}
1550
1551/*
1552 * Write a superblock and associated information back to disk.
1553 */
1554int
1555ffs_sbupdate(mp, waitfor, suspended)
1556	struct ufsmount *mp;
1557	int waitfor;
1558	int suspended;
1559{
1560	struct fs *fs = mp->um_fs;
1561	struct buf *sbbp;
1562	struct buf *bp;
1563	int blks;
1564	void *space;
1565	int i, size, error, allerror = 0;
1566
1567	if (fs->fs_ronly == 1 &&
1568	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1569	    (MNT_RDONLY | MNT_UPDATE))
1570		panic("ffs_sbupdate: write read-only filesystem");
1571	/*
1572	 * We use the superblock's buf to serialize calls to ffs_sbupdate().
1573	 */
1574	sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1575	    0, 0, 0);
1576	/*
1577	 * First write back the summary information.
1578	 */
1579	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1580	space = fs->fs_csp;
1581	for (i = 0; i < blks; i += fs->fs_frag) {
1582		size = fs->fs_bsize;
1583		if (i + fs->fs_frag > blks)
1584			size = (blks - i) * fs->fs_fsize;
1585		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1586		    size, 0, 0, 0);
1587		bcopy(space, bp->b_data, (u_int)size);
1588		space = (char *)space + size;
1589		if (suspended)
1590			bp->b_flags |= B_VALIDSUSPWRT;
1591		if (waitfor != MNT_WAIT)
1592			bawrite(bp);
1593		else if ((error = bwrite(bp)) != 0)
1594			allerror = error;
1595	}
1596	/*
1597	 * Now write back the superblock itself. If any errors occurred
1598	 * up to this point, then fail so that the superblock avoids
1599	 * being written out as clean.
1600	 */
1601	if (allerror) {
1602		brelse(sbbp);
1603		return (allerror);
1604	}
1605	bp = sbbp;
1606	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1607	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1608		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1609		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1610		fs->fs_sblockloc = SBLOCK_UFS1;
1611	}
1612	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1613	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1614		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1615		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1616		fs->fs_sblockloc = SBLOCK_UFS2;
1617	}
1618	fs->fs_fmod = 0;
1619	fs->fs_time = time_second;
1620	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1621	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1622	if (suspended)
1623		bp->b_flags |= B_VALIDSUSPWRT;
1624	if (waitfor != MNT_WAIT)
1625		bawrite(bp);
1626	else if ((error = bwrite(bp)) != 0)
1627		allerror = error;
1628	return (allerror);
1629}
1630
1631static int
1632ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1633	int attrnamespace, const char *attrname, struct thread *td)
1634{
1635
1636#ifdef UFS_EXTATTR
1637	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1638	    attrname, td));
1639#else
1640	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1641	    attrname, td));
1642#endif
1643}
1644
1645static void
1646ffs_ifree(struct ufsmount *ump, struct inode *ip)
1647{
1648
1649	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1650		uma_zfree(uma_ufs1, ip->i_din1);
1651	else if (ip->i_din2 != NULL)
1652		uma_zfree(uma_ufs2, ip->i_din2);
1653	uma_zfree(uma_inode, ip);
1654}
1655
1656static int dobkgrdwrite = 1;
1657SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
1658    "Do background writes (honoring the BV_BKGRDWRITE flag)?");
1659
1660/*
1661 * Complete a background write started from bwrite.
1662 */
1663static void
1664ffs_backgroundwritedone(struct buf *bp)
1665{
1666	struct bufobj *bufobj;
1667	struct buf *origbp;
1668
1669	/*
1670	 * Find the original buffer that we are writing.
1671	 */
1672	bufobj = bp->b_bufobj;
1673	BO_LOCK(bufobj);
1674	if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
1675		panic("backgroundwritedone: lost buffer");
1676	/* Grab an extra reference to be dropped by the bufdone() below. */
1677	bufobj_wrefl(bufobj);
1678	BO_UNLOCK(bufobj);
1679	/*
1680	 * Process dependencies then return any unfinished ones.
1681	 */
1682	if (LIST_FIRST(&bp->b_dep) != NULL)
1683		buf_complete(bp);
1684#ifdef SOFTUPDATES
1685	if (LIST_FIRST(&bp->b_dep) != NULL)
1686		softdep_move_dependencies(bp, origbp);
1687#endif
1688	/*
1689	 * This buffer is marked B_NOCACHE so when it is released
1690	 * by biodone it will be tossed.
1691	 */
1692	bp->b_flags |= B_NOCACHE;
1693	bp->b_flags &= ~B_CACHE;
1694	bufdone(bp);
1695	BO_LOCK(bufobj);
1696	/*
1697	 * Clear the BV_BKGRDINPROG flag in the original buffer
1698	 * and awaken it if it is waiting for the write to complete.
1699	 * If BV_BKGRDINPROG is not set in the original buffer it must
1700	 * have been released and re-instantiated - which is not legal.
1701	 */
1702	KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
1703	    ("backgroundwritedone: lost buffer2"));
1704	origbp->b_vflags &= ~BV_BKGRDINPROG;
1705	if (origbp->b_vflags & BV_BKGRDWAIT) {
1706		origbp->b_vflags &= ~BV_BKGRDWAIT;
1707		wakeup(&origbp->b_xflags);
1708	}
1709	BO_UNLOCK(bufobj);
1710}
1711
1712
1713/*
1714 * Write, release buffer on completion.  (Done by iodone
1715 * if async).  Do not bother writing anything if the buffer
1716 * is invalid.
1717 *
1718 * Note that we set B_CACHE here, indicating that buffer is
1719 * fully valid and thus cacheable.  This is true even of NFS
1720 * now so we set it generally.  This could be set either here
1721 * or in biodone() since the I/O is synchronous.  We put it
1722 * here.
1723 */
1724static int
1725ffs_bufwrite(struct buf *bp)
1726{
1727	int oldflags, s;
1728	struct buf *newbp;
1729
1730	CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
1731	if (bp->b_flags & B_INVAL) {
1732		brelse(bp);
1733		return (0);
1734	}
1735
1736	oldflags = bp->b_flags;
1737
1738	if (BUF_REFCNT(bp) == 0)
1739		panic("bufwrite: buffer is not busy???");
1740	s = splbio();
1741	/*
1742	 * If a background write is already in progress, delay
1743	 * writing this block if it is asynchronous. Otherwise
1744	 * wait for the background write to complete.
1745	 */
1746	BO_LOCK(bp->b_bufobj);
1747	if (bp->b_vflags & BV_BKGRDINPROG) {
1748		if (bp->b_flags & B_ASYNC) {
1749			BO_UNLOCK(bp->b_bufobj);
1750			splx(s);
1751			bdwrite(bp);
1752			return (0);
1753		}
1754		bp->b_vflags |= BV_BKGRDWAIT;
1755		msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
1756		if (bp->b_vflags & BV_BKGRDINPROG)
1757			panic("bufwrite: still writing");
1758	}
1759	BO_UNLOCK(bp->b_bufobj);
1760
1761	/* Mark the buffer clean */
1762	bundirty(bp);
1763
1764	/*
1765	 * If this buffer is marked for background writing and we
1766	 * do not have to wait for it, make a copy and write the
1767	 * copy so as to leave this buffer ready for further use.
1768	 *
1769	 * This optimization eats a lot of memory.  If we have a page
1770	 * or buffer shortfall we can't do it.
1771	 */
1772	if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
1773	    (bp->b_flags & B_ASYNC) &&
1774	    !vm_page_count_severe() &&
1775	    !buf_dirty_count_severe()) {
1776		KASSERT(bp->b_iodone == NULL,
1777		    ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
1778
1779		/* get a new block */
1780		newbp = geteblk(bp->b_bufsize);
1781
1782		/*
1783		 * set it to be identical to the old block.  We have to
1784		 * set b_lblkno and BKGRDMARKER before calling bgetvp()
1785		 * to avoid confusing the splay tree and gbincore().
1786		 */
1787		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
1788		newbp->b_lblkno = bp->b_lblkno;
1789		newbp->b_xflags |= BX_BKGRDMARKER;
1790		BO_LOCK(bp->b_bufobj);
1791		bp->b_vflags |= BV_BKGRDINPROG;
1792		bgetvp(bp->b_vp, newbp);
1793		BO_UNLOCK(bp->b_bufobj);
1794		newbp->b_bufobj = &bp->b_vp->v_bufobj;
1795		newbp->b_blkno = bp->b_blkno;
1796		newbp->b_offset = bp->b_offset;
1797		newbp->b_iodone = ffs_backgroundwritedone;
1798		newbp->b_flags |= B_ASYNC;
1799		newbp->b_flags &= ~B_INVAL;
1800
1801#ifdef SOFTUPDATES
1802		/* move over the dependencies */
1803		if (LIST_FIRST(&bp->b_dep) != NULL)
1804			softdep_move_dependencies(bp, newbp);
1805#endif
1806
1807		/*
1808		 * Initiate write on the copy, release the original to
1809		 * the B_LOCKED queue so that it cannot go away until
1810		 * the background write completes. If not locked it could go
1811		 * away and then be reconstituted while it was being written.
1812		 * If the reconstituted buffer were written, we could end up
1813		 * with two background copies being written at the same time.
1814		 */
1815		bqrelse(bp);
1816		bp = newbp;
1817	}
1818
1819	/* Let the normal bufwrite do the rest for us */
1820	return (bufwrite(bp));
1821}
1822
1823
1824static void
1825ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1826{
1827	struct vnode *vp;
1828	int error;
1829	struct buf *tbp;
1830
1831	vp = bo->__bo_vnode;
1832	if (bp->b_iocmd == BIO_WRITE) {
1833		if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
1834		    bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
1835		    (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
1836			panic("ffs_geom_strategy: bad I/O");
1837		bp->b_flags &= ~B_VALIDSUSPWRT;
1838		if ((vp->v_vflag & VV_COPYONWRITE) &&
1839		    vp->v_rdev->si_snapdata != NULL) {
1840			if ((bp->b_flags & B_CLUSTER) != 0) {
1841				runningbufwakeup(bp);
1842				TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1843					      b_cluster.cluster_entry) {
1844					error = ffs_copyonwrite(vp, tbp);
1845					if (error != 0 &&
1846					    error != EOPNOTSUPP) {
1847						bp->b_error = error;
1848						bp->b_ioflags |= BIO_ERROR;
1849						bufdone(bp);
1850						return;
1851					}
1852				}
1853				bp->b_runningbufspace = bp->b_bufsize;
1854				atomic_add_int(&runningbufspace,
1855					       bp->b_runningbufspace);
1856			} else {
1857				error = ffs_copyonwrite(vp, bp);
1858				if (error != 0 && error != EOPNOTSUPP) {
1859					bp->b_error = error;
1860					bp->b_ioflags |= BIO_ERROR;
1861					bufdone(bp);
1862					return;
1863				}
1864			}
1865		}
1866#ifdef SOFTUPDATES
1867		if ((bp->b_flags & B_CLUSTER) != 0) {
1868			TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1869				      b_cluster.cluster_entry) {
1870				if (LIST_FIRST(&tbp->b_dep) != NULL)
1871					buf_start(tbp);
1872			}
1873		} else {
1874			if (LIST_FIRST(&bp->b_dep) != NULL)
1875				buf_start(bp);
1876		}
1877
1878#endif
1879	}
1880	g_vfs_strategy(bo, bp);
1881}
1882